Source code for solidipes_core_plugin.downloaders.dspace7

import os

import requests
from solidipes.downloaders.downloader import Downloader
from solidipes.scripts.init import create_solidipes_directory
from solidipes.utils.utils import DataRepositoryException, set_study_metadata

from ..utils.dspace7_utils import check_response, download_files, get_host_and_id


[docs] class Dspace7Downloader(Downloader): command = ["dspace7", "infoscience", "boris"] command_help = "Download study from Dspace7"
[docs] def download(self, args): main(args)
[docs] def populate_arg_parser(self, parser): parser.description = self.command_help parser.add_argument("identifier", help="URL or DOI of the study to download") parser.add_argument( "destination", nargs="?", default="", help="Path to the destination folder. If not specified, defaults to the study numeric ID.", ) parser.add_argument( "--only-metadata", help="Only download metadata (overrides destination directory's metadata!)", action="store_true", )
[docs] def main(args): """Download content from Dspace7""" from solidipes.utils.metadata import dc_to_solidipes try: host, study_id = get_host_and_id(args.identifier) url = f"https://{host}/server/api/core/items/{study_id}?embed=bundles/bitstreams" # Scan record response = requests.get(url) check_response(response, 200, "retrieve record") record = response.json() print(f"Retrieving study {study_id} from {host}...") # Create destination folder if it does not exist if not args.destination: args.destination = study_id if not os.path.exists(args.destination): os.makedirs(args.destination) # Create Solidipes directory if it does not exist try: create_solidipes_directory(args.destination) except FileExistsError: pass # Save metadata in YAML file print("Saving metadata...") # print(record["metadata"]) metadata = process_metadata(dc_to_solidipes(record["metadata"])) metadata["zz_orig_metadata"] = record["metadata"] metadata["zz_orig_metadata"]["00solidipes_platform"] = "dspace7" metadata["zz_orig_metadata"]["00solidipes_host"] = host metadata["zz_orig_metadata"]["00solidipes_study_id"] = study_id set_study_metadata(metadata, initial_path=args.destination) if args.only_metadata: return download_files(record, destination=args.destination, progressbar=True) except Exception as e: if type(e) is not DataRepositoryException: raise e print(e) return
[docs] def process_metadata(metadata): """Process metadata to make dataset uploadable again""" # TODO ignoring this for the moment if "upload_type" not in metadata: if "resource_type" in metadata: metadata["upload_type"] = metadata["resource_type"]["type"] del metadata["resource_type"] else: metadata["upload_type"] = "dataset" if "journal" in metadata: journal = metadata["journal"] for field in ["title", "volume", "issue", "pages"]: if field in journal: metadata[f"journal_{field}"] = journal[field] del metadata["journal"] if "license" in metadata: license_type = metadata["license"].get("id") if license_type: metadata["license"] = license_type.lower() else: del metadata["license"] related_identifiers = metadata.get("related_identifiers", []) for related in related_identifiers: if related.get("relation") == "isVersionOf": related_identifiers.remove(related) return metadata