Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use tuque to access Fedora objects and migrate all datastreams #6

Merged
merged 3 commits into from
Aug 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"name": "islandora/migrate_7x_claw",
"description": "Tools to assist in Islandora 7.x to CLAW migrations.",
"type": "drupal-module",
"homepage": "https://github.com/Islandora-Devops/migrate_7x_claw",
"support": {
"issues": "https://github.com/Islandora-CLAW/CLAW/issues",
"irc": "irc://irc.freenode.org/islandora",
"source": "https://github.com/Islandora-Devops/migrate_7x_claw"
},
"license": "GPL-2.0-or-later",
"repositories": [
{
"type": "composer",
"url": "https://packages.drupal.org/8"
}
],
"require": {
"drupal/migrate_plus": "^4",
"jonathangreen/tuque": "dev-master"
},
"require-dev": {
"phpunit/phpunit": "^6",
"squizlabs/php_codesniffer": "2.7.1",
"drupal/coder": "*",
"sebastian/phpcpd": "*"
},
"authors": [
{
"name": "Islandora Foundation",
"email": "[email protected]",
"role": "Owner"
},
{
"name": "Daniel Lamb",
"email": "[email protected]",
"role": "Maintainer"
},
{
"name": "Jared Whiklo",
"email": "[email protected]",
"role": "Developer"
}
],
"minimum-stability": "dev"
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ source:
solr_base_url: http://10.0.2.2:9080/solr
# Base url of your Fedora 3 instance
fedora_base_url: &fedora_base_url http://10.0.2.2:9080/fedora
# One of 'object' or 'datastreams', affects the count.
islandora_type: datastreams
# Required for islandora_type: datastreams, is the Solr field with the datastreams indexed.
datastream_solr_field: fedora_datastreams_ms

# define a http data fetcher to access the remote sites
data_fetcher_plugin: http
Expand All @@ -24,7 +28,7 @@ source:
content_model: islandora:sp_basic_image

# Fedora requires the authentication you defined above to access the objectXML, so use this plugin instead of xml
data_parser_plugin: authenticated_xml
data_parser_plugin: tuque_datastreams
# This grabs the root of an object as we only get one object for each XML.
item_selector: /foxml:digitalObject

Expand All @@ -33,19 +37,31 @@ source:
extension: 'jpg'
fedora_base_url: *fedora_base_url
objects_string: 'objects'
datastreams_string: 'datastreams'
content_string: 'content'
obj_contents_suffix: 'datastreams/OBJ/content'
creator_uid: 1

fields:
-
name: PID
label: 'PID'
selector: '@PID'
selector: 'ID'
-
name: mimetype
label: 'Mimetype'
selector: 'mimetype'
-
name: DSID
label: 'Datastream ID'
selector: 'DSID'

# This is generated by the tuque_datastreams dataParser
ids:
PID:
PID_DSID:
type: string


process:

# Bundle type.
Expand All @@ -56,27 +72,44 @@ process:
# Turns 'islandora:5' into 'islandora_5'
digital_id:
plugin: str_replace
source: PID
source: PID_DSID
search: ':'
replace: '_'

# Turns 'islandora:5' into 'http://10.0.2.2:9080/fedora/object/islandora:5/OBJ/contents'
# Turns 'islandora:5' into 'http://10.0.2.2:9080/fedora/object/islandora:5/datastreams/OBJ/contents'
remote_path:
plugin: concat
delimiter: /
source:
- constants/fedora_base_url
- constants/objects_string
- PID
- constants/obj_contents_suffix
- constants/datastreams_string
- DSID
- constants/content_string

# Determine a file extension based on the mime-type. Should be a better way
extension:
plugin: static_map
source: mimetype
map:
text/xml: 'xml'
application/xml: 'xml'
application/rdf+xml: 'xml'
image/jpeg: 'jpg'
image/tiff: 'tiff'
image/tif: 'tiff'
image/jpg: 'jpg'
image/png: 'png'
image/gif: 'gif'

# Turns 'islandora_5' into 'islandora_5.jpg'
filename:
plugin: concat
delimiter: '.'
source:
- '@digital_id'
- constants/extension
- '@extension'

# Turns 'islandora_5.jpg' into 'public://masters/islandora_5.jpg'
local_path:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ source:
solr_base_url: http://10.0.2.2:9080/solr
# Base url of your Fedora 3 instance
fedora_base_url: &fedora_base_url http://10.0.2.2:9080/fedora
# One of 'object' or 'datastreams', affects the count.
islandora_type: datastreams
# Required for islandora_type: datastreams, is the Solr field with the datastreams indexed.
datastream_solr_field: fedora_datastreams_ms

# define a http data fetcher to access the remote sites
data_fetcher_plugin: http
Expand All @@ -28,36 +32,52 @@ source:
content_model: islandora:sp_basic_image

# Fedora requires the authentication you defined above to access the objectXML, so use this plugin instead of xml
data_parser_plugin: authenticated_xml
data_parser_plugin: tuque_datastreams
# This grabs the root of an object as we only get one object for each XML.
item_selector: /foxml:digitalObject

constants:
# Tag for this media.
# Tag for OBJ.
preservation_master: 'Preservation Master'
other_tag: 'Datastream'
# UID of the author.
creator_uid: 1

fields:
-
name: PID
label: 'PID'
selector: '@PID'
selector: 'PID'
-
name: DSID
label: 'Datastream ID'
selector: 'DSID'
-
name: mimetype
label: "MimeType"
selector: 'foxml:datastream[@ID = "OBJ" and @CONTROL_GROUP = "M"]/foxml:datastreamVersion[position() = last()]/@MIMETYPE'
label: 'Mimetype'
selector: 'mimetype'
-
name: size
label: 'Datastream size'
selector: 'size'
-
name: format
label: 'Format'
selector: 'format'
-
name: filesize
label: 'File size'
selector: 'foxml:datastream[@ID = "OBJ" and @CONTROL_GROUP = "M"]/foxml:datastreamVersion[position() = last()]/@SIZE'
name: checksum
label: 'Checksum'
selector: 'checksum'

ids:
PID:
PID_DSID:
type: string

process:

# Set author.
uid: constants/creator_uid

# Add mimetype if available.
field_mime_type:
plugin: skip_on_empty
Expand All @@ -74,16 +94,31 @@ process:
field_media_image/target_id:
plugin: migration_lookup
migration: islandora_basic_image_files
source: PID
source: PID_DSID
no_stub: true
# If its a file we need to link this field.
field_media_file/target_id:
plugin: migration_lookup
migration: islandora_basic_image_files
source: PID_DSID
no_stub: true

# Set the display profile.
field_media_image/display:
plugin: default_value
default_value: 1
field_media_file/display:
plugin: default_value
default_value: 1

# Set the description.
field_media_image/description:
plugin: default_value
default_value: ''
field_media_file/description:
plugin: default_value
default_value: ''


# Lookup the Repository object we just created
field_media_of:
Expand All @@ -92,22 +127,37 @@ process:
source: PID
no_stub: true

# Set as Preservation Master
# Tag as Preservation Master or Datastream
field_tags:
plugin: entity_lookup
source: constants/preservation_master
value_key: name
bundle_key: vid
bundle: tags
entity_type: taxonomy_term
ignore_case: true

# Set author.
uid: constants/creator_uid
-
plugin: static_map
source: DSID
map:
OBJ: 'Preservation Master'
default_value: 'Datastream'
-
plugin: entity_lookup
value_key: name
bundle_key: vid
bundle: tags
entity_type: taxonomy_term
ignore_case: true

bundle:
plugin: static_map
source: mimetype
map:
image/jpeg: 'image'
image/tiff: 'image'
image/tif: 'image'
image/jpg: 'image'
image/png: 'image'
image/gif: 'image'
default_value: 'file'

destination:
plugin: 'entity:media'
default_bundle: image


# Enforced dependencies means this migration is removed if any of these modules are uninstalled.
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion migrate_7x_claw.info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ package: Islandora
core: 8.x
dependencies:
- drupal:migrate
- migrate_plus:migrate_plus
- migrate_plus:migrate_plus
Loading