Skip to content

Commit

Permalink
feat(dcm2pdf): add read-dicom-encapsulated-pdf operation to itk-dicom
Browse files Browse the repository at this point in the history
Copy dcmtk/dcmdata/apps/dcm2pdf.cc (InsightSoftwareConsortium/DCMTK@2dd1815)
into itk-wasm as a command line application at
src/io/internal/pipelines/dicom/read-dicom-encapsulated-pdf.cxx

This first commit copies the original dcmtk file as-is, in order to
document/track the deviation from its original form
as it is made compatible with CLI::App library and itk-wasm in general.
  • Loading branch information
jadh4v committed Sep 27, 2022
1 parent 11cacdb commit 2a383c8
Show file tree
Hide file tree
Showing 2 changed files with 354 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/io/internal/pipelines/dicom/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ set(wasm_modules )
# diquant.cc required for odd linker error
add_executable(structured-report-to-text structured-report-to-text.cxx diquant.cc)
add_executable(structured-report-to-html structured-report-to-html.cxx diquant.cc)
add_executable(read-dicom-encapsulated-pdf read-dicom-encapsulated-pdf.cxx)
target_link_libraries(structured-report-to-text PUBLIC ${ITK_LIBRARIES})
target_link_libraries(structured-report-to-html PUBLIC ${ITK_LIBRARIES})
target_link_libraries(read-dicom-encapsulated-pdf PUBLIC ${ITK_LIBRARIES})
list(APPEND wasm_modules "structured-report-to-text")
list(APPEND wasm_modules "structured-report-to-html")
list(APPEND wasm_modules "read-dicom-encapsulated-pdf")


if (WASI AND DEFINED WebAssemblyInterface_BINARY_DIR)
Expand Down
351 changes: 351 additions & 0 deletions src/io/internal/pipelines/dicom/read-dicom-encapsulated-pdf.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
/*
*
* Copyright (C) 2007-2021, OFFIS e.V.
* All rights reserved. See COPYRIGHT file for details.
*
* This software and supporting documentation were developed by
*
* OFFIS e.V.
* R&D Division Health
* Escherweg 2
* D-26121 Oldenburg, Germany
*
*
* Module: dcmdata
*
* Author: Marco Eichelberg
*
* Purpose: Exctract PDF file from DICOM encapsulated PDF storage object
*
*/

#include "dcmtk/config/osconfig.h" /* make sure OS specific configuration is included first */

BEGIN_EXTERN_C
#ifdef HAVE_FCNTL_H
#include <fcntl.h> /* for O_RDONLY */
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> /* required for sys/stat.h */
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h> /* for stat, fstat */
#endif
END_EXTERN_C

#include "dcmtk/dcmdata/dctk.h"
#include "dcmtk/dcmdata/cmdlnarg.h"
#include "dcmtk/ofstd/ofconapp.h"
#include "dcmtk/dcmdata/dcuid.h" /* for dcmtk version name */
#include "dcmtk/ofstd/ofstd.h"
#include "dcmtk/dcmdata/dcistrmz.h" /* for dcmZlibExpectRFC1950Encoding */

#ifdef WITH_ZLIB
#include "itk_zlib.h" /* for zlibVersion() */
#endif

#define OFFIS_CONSOLE_APPLICATION "dcm2pdf"

static OFLogger dcm2pdfLogger = OFLog::getLogger("dcmtk.apps." OFFIS_CONSOLE_APPLICATION);

static char rcsid[] = "$dcmtk: " OFFIS_CONSOLE_APPLICATION " v"
OFFIS_DCMTK_VERSION " " OFFIS_DCMTK_RELEASEDATE " $";

#define FILENAME_PLACEHOLDER "#f"

static OFString replaceChars(const OFString &srcstr, const OFString &pattern, const OFString &substitute)
/*
* This function replaces all occurrences of pattern in srcstr with substitute and returns
* the result as a new OFString variable. Note that srcstr itself will not be changed.
*
* Parameters:
* srcstr - [in] The source string.
* pattern - [in] The pattern string which shall be substituted.
* substitute - [in] The substitute for pattern in srcstr.
*/
{
OFString result = srcstr;
size_t pos = 0;

while (pos != OFString_npos)
{
pos = result.find(pattern, pos);

if (pos != OFString_npos)
{
result.replace(pos, pattern.size(), substitute);
pos += substitute.size();
}
}

return result;
}


#define SHORTCOL 3
#define LONGCOL 20

int main(int argc, char *argv[])
{
const char *opt_ifname = NULL;
const char *opt_ofname = NULL;
const char *opt_execString = NULL;
E_FileReadMode opt_readMode = ERM_autoDetect;
E_TransferSyntax opt_ixfer = EXS_Unknown;

OFConsoleApplication app(OFFIS_CONSOLE_APPLICATION, "Extract PDF file from DICOM encapsulated PDF", rcsid);
OFCommandLine cmd;
cmd.setOptionColumns(LONGCOL, SHORTCOL);
cmd.setParamColumn(LONGCOL + SHORTCOL + 4);

cmd.addParam("dcmfile-in", "DICOM input filename");
cmd.addParam("pdffile-out", "PDF output filename");

cmd.addGroup("general options:", LONGCOL, SHORTCOL + 2);
cmd.addOption("--help", "-h", "print this help text and exit", OFCommandLine::AF_Exclusive);
cmd.addOption("--version", "print version information and exit", OFCommandLine::AF_Exclusive);
OFLog::addOptions(cmd);

cmd.addGroup("input options:");
cmd.addSubGroup("input file format:");
cmd.addOption("--read-file", "+f", "read file format or data set (default)");
cmd.addOption("--read-file-only", "+fo", "read file format only");
cmd.addOption("--read-dataset", "-f", "read data set without file meta information");
cmd.addSubGroup("input transfer syntax:", LONGCOL, SHORTCOL);
cmd.addOption("--read-xfer-auto", "-t=", "use TS recognition (default)");
cmd.addOption("--read-xfer-detect", "-td", "ignore TS specified in the file meta header");
cmd.addOption("--read-xfer-little", "-te", "read with explicit VR little endian TS");
cmd.addOption("--read-xfer-big", "-tb", "read with explicit VR big endian TS");
cmd.addOption("--read-xfer-implicit", "-ti", "read with implicit VR little endian TS");
cmd.addSubGroup("parsing of odd-length attributes:");
cmd.addOption("--accept-odd-length", "+ao", "accept odd length attributes (default)");
cmd.addOption("--assume-even-length", "+ae", "assume real length is one byte larger");
cmd.addSubGroup("handling of undefined length UN elements:");
cmd.addOption("--enable-cp246", "+ui", "read undefined len UN as implicit VR (default)");
cmd.addOption("--disable-cp246", "-ui", "read undefined len UN as explicit VR");
cmd.addSubGroup("handling of defined length UN elements:");
cmd.addOption("--retain-un", "-uc", "retain elements as UN (default)");
cmd.addOption("--convert-un", "+uc", "convert to real VR if known");
cmd.addSubGroup("automatic data correction:");
cmd.addOption("--enable-correction", "+dc", "enable automatic data correction (default)");
cmd.addOption("--disable-correction", "-dc", "disable automatic data correction");
#ifdef WITH_ZLIB
cmd.addSubGroup("bitstream format of deflated input:");
cmd.addOption("--bitstream-deflated", "+bd", "expect deflated bitstream (default)");
cmd.addOption("--bitstream-zlib", "+bz", "expect deflated zlib bitstream");
#endif

cmd.addGroup("execution options:", LONGCOL, SHORTCOL + 2);
cmd.addOption("--exec", "-x", 1, "[c]ommand: string",
"execute command c after PDF extraction");
/* evaluate command line */
prepareCmdLineArgs(argc, argv, OFFIS_CONSOLE_APPLICATION);
if (app.parseCommandLine(cmd, argc, argv))
{
/* check exclusive options first */
if (cmd.hasExclusiveOption())
{
if (cmd.findOption("--version"))
{
app.printHeader(OFTrue /*print host identifier*/);
COUT << OFendl << "External libraries used:";
#ifdef WITH_ZLIB
COUT << OFendl << "- ZLIB, Version " << zlibVersion() << OFendl;
#else
COUT << " none" << OFendl;
#endif
return 0;
}
}

/* command line parameters and options */
cmd.getParam(1, opt_ifname);
cmd.getParam(2, opt_ofname);

OFLog::configureFromCommandLine(cmd, app);

cmd.beginOptionBlock();
if (cmd.findOption("--read-file")) opt_readMode = ERM_autoDetect;
if (cmd.findOption("--read-file-only")) opt_readMode = ERM_fileOnly;
if (cmd.findOption("--read-dataset")) opt_readMode = ERM_dataset;
cmd.endOptionBlock();

cmd.beginOptionBlock();
if (cmd.findOption("--read-xfer-auto"))
opt_ixfer = EXS_Unknown;
if (cmd.findOption("--read-xfer-detect"))
dcmAutoDetectDatasetXfer.set(OFTrue);
if (cmd.findOption("--read-xfer-little"))
{
app.checkDependence("--read-xfer-little", "--read-dataset", opt_readMode == ERM_dataset);
opt_ixfer = EXS_LittleEndianExplicit;
}
if (cmd.findOption("--read-xfer-big"))
{
app.checkDependence("--read-xfer-big", "--read-dataset", opt_readMode == ERM_dataset);
opt_ixfer = EXS_BigEndianExplicit;
}
if (cmd.findOption("--read-xfer-implicit"))
{
app.checkDependence("--read-xfer-implicit", "--read-dataset", opt_readMode == ERM_dataset);
opt_ixfer = EXS_LittleEndianImplicit;
}
cmd.endOptionBlock();

cmd.beginOptionBlock();
if (cmd.findOption("--accept-odd-length"))
{
dcmAcceptOddAttributeLength.set(OFTrue);
}
if (cmd.findOption("--assume-even-length"))
{
dcmAcceptOddAttributeLength.set(OFFalse);
}
cmd.endOptionBlock();

cmd.beginOptionBlock();
if (cmd.findOption("--enable-cp246"))
{
dcmEnableCP246Support.set(OFTrue);
}
if (cmd.findOption("--disable-cp246"))
{
dcmEnableCP246Support.set(OFFalse);
}
cmd.endOptionBlock();

cmd.beginOptionBlock();
if (cmd.findOption("--retain-un"))
{
dcmEnableUnknownVRConversion.set(OFFalse);
}
if (cmd.findOption("--convert-un"))
{
dcmEnableUnknownVRConversion.set(OFTrue);
}
cmd.endOptionBlock();

cmd.beginOptionBlock();
if (cmd.findOption("--enable-correction"))
{
dcmEnableAutomaticInputDataCorrection.set(OFTrue);
}
if (cmd.findOption("--disable-correction"))
{
dcmEnableAutomaticInputDataCorrection.set(OFFalse);
}
cmd.endOptionBlock();

#ifdef WITH_ZLIB
cmd.beginOptionBlock();
if (cmd.findOption("--bitstream-deflated"))
{
dcmZlibExpectRFC1950Encoding.set(OFFalse);
}
if (cmd.findOption("--bitstream-zlib"))
{
dcmZlibExpectRFC1950Encoding.set(OFTrue);
}
cmd.endOptionBlock();
#endif

if (cmd.findOption("--exec")) app.checkValue(cmd.getValue(opt_execString));
}

/* print resource identifier */
OFLOG_DEBUG(dcm2pdfLogger, rcsid << OFendl);

/* make sure data dictionary is loaded */
if (!dcmDataDict.isDictionaryLoaded())
{
OFLOG_WARN(dcm2pdfLogger, "no data dictionary loaded, check environment variable: "
<< DCM_DICT_ENVIRONMENT_VARIABLE);
}

// open inputfile
if ((opt_ifname == NULL) || (strlen(opt_ifname) == 0))
{
OFLOG_FATAL(dcm2pdfLogger, "invalid filename: <empty string>");
return 1;
}

DcmFileFormat fileformat;
DcmDataset * dataset = fileformat.getDataset();

OFLOG_INFO(dcm2pdfLogger, "open input file " << opt_ifname);

OFCondition error = fileformat.loadFile(opt_ifname, opt_ixfer, EGL_noChange, DCM_MaxReadLength, opt_readMode);

if (error.bad())
{
OFLOG_FATAL(dcm2pdfLogger, error.text() << ": reading file: " << opt_ifname);
return 1;
}

OFString sopClass;
error = dataset->findAndGetOFString(DCM_SOPClassUID, sopClass);
if (error.bad() || sopClass != UID_EncapsulatedPDFStorage)
{
OFLOG_FATAL(dcm2pdfLogger, "not an Encapsulated PDF Storage object: " << opt_ifname);
return 1;
}

DcmElement *delem = NULL;
error = dataset->findAndGetElement(DCM_EncapsulatedDocument, delem);
if (error.bad() || delem == NULL)
{
OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document missing.");
return 1;
}

Uint32 len = delem->getLength();
Uint8 *pdfDocument = NULL;
error = delem->getUint8Array(pdfDocument);
if (error.bad() || pdfDocument == NULL || len == 0)
{
OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document empty or wrong VR.");
return 1;
}

/* strip pad byte at end of file, if there is one. The PDF format expects
* files to end with %%EOF followed by CR/LF (although in some cases the
* CR/LF may be missing or you might only find CR or LF).
* If the last character of the file is not a CR or LF, and not the
* letter 'F', we assume it is either trailing garbage or a pad byte, and remove it.
*/
if (pdfDocument[len-1] != 10 && pdfDocument[len-1] != 13 && pdfDocument[len-1] != 'F')
{
--len;
}

FILE *pdffile = fopen(opt_ofname, "wb");
if (pdffile == NULL)
{
OFLOG_FATAL(dcm2pdfLogger, "unable to create file " << opt_ofname);
return 1;
}

if (len != fwrite(pdfDocument, 1, len, pdffile))
{
OFLOG_FATAL(dcm2pdfLogger, "write error in file " << opt_ofname);
fclose(pdffile);
return 1;
}

fclose(pdffile);

OFLOG_INFO(dcm2pdfLogger, "conversion successful");

if (opt_execString)
{
OFString cmdStr = opt_execString;
cmdStr = replaceChars(cmdStr, OFString(FILENAME_PLACEHOLDER), opt_ofname);

// Execute command and return result
#ifndef __wasi__
return system(cmdStr.c_str());
#endif
}

return 0;
}

0 comments on commit 2a383c8

Please sign in to comment.