import os
import requests
from solidipes.downloaders.downloader import Downloader
from solidipes.scripts.init import create_solidipes_directory
from solidipes.utils.utils import DataRepositoryException, set_study_metadata
from ..utils.dspace7_utils import check_response, download_files, get_host_and_id
[docs]
class Dspace7Downloader(Downloader):
command = ["dspace7", "infoscience", "boris"]
command_help = "Download study from Dspace7"
[docs]
def download(self, args):
main(args)
[docs]
def populate_arg_parser(self, parser):
parser.description = self.command_help
parser.add_argument("identifier", help="URL or DOI of the study to download")
parser.add_argument(
"destination",
nargs="?",
default="",
help="Path to the destination folder. If not specified, defaults to the study numeric ID.",
)
parser.add_argument(
"--only-metadata",
help="Only download metadata (overrides destination directory's metadata!)",
action="store_true",
)
[docs]
def main(args):
"""Download content from Dspace7"""
from solidipes.utils.metadata import dc_to_solidipes
try:
host, study_id = get_host_and_id(args.identifier)
url = f"https://{host}/server/api/core/items/{study_id}?embed=bundles/bitstreams"
# Scan record
response = requests.get(url)
check_response(response, 200, "retrieve record")
record = response.json()
print(f"Retrieving study {study_id} from {host}...")
# Create destination folder if it does not exist
if not args.destination:
args.destination = study_id
if not os.path.exists(args.destination):
os.makedirs(args.destination)
# Create Solidipes directory if it does not exist
try:
create_solidipes_directory(args.destination)
except FileExistsError:
pass
# Save metadata in YAML file
print("Saving metadata...")
# print(record["metadata"])
metadata = process_metadata(dc_to_solidipes(record["metadata"]))
metadata["zz_orig_metadata"] = record["metadata"]
metadata["zz_orig_metadata"]["00solidipes_platform"] = "dspace7"
metadata["zz_orig_metadata"]["00solidipes_host"] = host
metadata["zz_orig_metadata"]["00solidipes_study_id"] = study_id
set_study_metadata(metadata, initial_path=args.destination)
if args.only_metadata:
return
download_files(record, destination=args.destination, progressbar=True)
except Exception as e:
if type(e) is not DataRepositoryException:
raise e
print(e)
return