# Copyright (C) 2020 Stuart Swerdloff, Simon Biggs
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import json
import logging
import pathlib
from os.path import abspath, basename, dirname, isdir, isfile
from os.path import join as pjoin
from pymedphys._imports import pydicom
from pymedphys._dicom.anonymise import (
anonymise_dataset,
anonymise_directory,
anonymise_file,
get_baseline_keyword_vr_dict,
get_default_identifying_keywords,
)
from pymedphys._dicom.anonymise import strategy as anon_strategy
from . import strategy
HERE = dirname(abspath(__file__))
IDENTIFYING_UIDS_FILEPATH = pjoin(HERE, "identifying_uids.json")
@functools.lru_cache()
def _get_default_identifying_uids():
with open(IDENTIFYING_UIDS_FILEPATH) as uid_file:
IDENTIFYING_UIDS = json.load(uid_file)
return tuple(IDENTIFYING_UIDS)
def get_default_identifying_uids():
return list(_get_default_identifying_uids())
@functools.lru_cache()
def _get_default_pseudonymisation_keywords():
anon_keyword_list = get_default_identifying_keywords()
# The preferred approach is to pseudonymise the contents
# of sequences, rather than operate on the sequence itself
#
# Eliminating the keywords that are sequences fixes issue #1034
# for default usage
identifying_keywords_less_sequences = [
x for x in anon_keyword_list if not x.endswith("Sequence")
]
anon_keyword_set = set(identifying_keywords_less_sequences)
pseudo_uid_set = set(get_default_identifying_uids())
return tuple(anon_keyword_set.union(pseudo_uid_set))
[docs]def get_default_pseudonymisation_keywords():
return list(_get_default_pseudonymisation_keywords())
def anonymise_with_pseudo_cli(args):
if args.delete_unknown_tags:
handle_unknown_tags = True
elif args.ignore_unknown_tags:
handle_unknown_tags = False
else:
handle_unknown_tags = None
if not args.keywords_to_leave_unchanged:
keywords_to_leave_unchanged = ()
else:
keywords_to_leave_unchanged = args.keywords_to_leave_unchanged
replacement_strategy = anon_strategy.ANONYMISATION_HARDCODE_DISPATCH
identifying_keywords_for_pseudo = get_default_identifying_keywords()
logging.info("Pseudonymisation called!")
identifying_keywords_for_pseudo = get_default_pseudonymisation_keywords()
logging.info("Using pseudonymisation keywords")
replacement_strategy = strategy.pseudonymisation_dispatch
logging.info("Using pseudonymisation strategy")
if isfile(args.input_path):
anonymise_file(
dicom_filepath=args.input_path,
output_filepath=args.output_path,
delete_original_file=args.delete_original_files,
anonymise_filename=not args.preserve_filenames,
replace_values=not args.clear_values,
keywords_to_leave_unchanged=keywords_to_leave_unchanged,
delete_private_tags=not args.keep_private_tags,
delete_unknown_tags=handle_unknown_tags,
replacement_strategy=replacement_strategy,
identifying_keywords=identifying_keywords_for_pseudo,
)
elif isdir(args.input_path):
anonymise_directory(
dicom_dirpath=args.input_path,
output_dirpath=args.output_path,
delete_original_files=args.delete_original_files,
anonymise_filenames=not args.preserve_filenames,
replace_values=not args.clear_values,
keywords_to_leave_unchanged=keywords_to_leave_unchanged,
delete_private_tags=not args.keep_private_tags,
delete_unknown_tags=handle_unknown_tags,
replacement_strategy=replacement_strategy,
identifying_keywords=identifying_keywords_for_pseudo,
)
else:
raise FileNotFoundError(
"No file or directory was found at the supplied input path."
)
[docs]def is_valid_strategy_for_keywords(
identifying_keywords=None, replacement_strategy=None
):
if identifying_keywords is None:
identifying_keywords = get_default_pseudonymisation_keywords()
if replacement_strategy is None:
replacement_strategy = strategy.pseudonymisation_dispatch
baseline_keyword_vr_dict = get_baseline_keyword_vr_dict()
for keyword in identifying_keywords:
vr = baseline_keyword_vr_dict[keyword]
# pydicom.datadict.dictionary_VR(pydicom.datadict.tag_for_keyword(keyword))
if vr not in replacement_strategy:
return False
return True
[docs]def pseudonymise(dicom_input, output_path=None):
"""Convenient API to pseudonymisation.
Elements whose tags are not in the pydicom dictionary will be deleted
PatientSex will not be modified/pseudonymised
For fine tune control, use anonymise_dataset() instead
Parameters
----------
dicom_input : ``pydicom.dataset.Dataset | str | pathlib.Path``
Either a dataset, a path to a file or a path to a directory
output_path : ``str | pathlib.Path``, optional
If the input is a file or a path, the directory to place the
pseudonymised files, by default None
Returns
-------
``pydicom.dataset.Dataset`` | ``str`` | ``list`` of ``str``
if the dicom_input was a dataset, return the pseudonymised dataset
if the dicom input was a file, return the path to the pseudonymised file.
if the dicom input was a directory, return the list of successfully
anonymised files, and return that instead of None
"""
replacement_strategy = strategy.pseudonymisation_dispatch
identifying_keywords_for_pseudo = get_default_pseudonymisation_keywords()
if not is_valid_strategy_for_keywords():
logging.error("Pseudonymisation strategy is not valid for keywords")
logging.error("Please submit issue to PyMedPhys")
# but continue on, the data might not contain the offending keywords
# and if it does... there will be some kind of error raised
keywords_to_leave_unchanged = list("PatientSex")
if isinstance(dicom_input, pydicom.dataset.Dataset):
pseudo_ds = anonymise_dataset(
dicom_input,
keywords_to_leave_unchanged=keywords_to_leave_unchanged,
delete_unknown_tags=True,
replacement_strategy=replacement_strategy,
identifying_keywords=identifying_keywords_for_pseudo,
)
return pseudo_ds
if pathlib.Path().joinpath(dicom_input).is_dir():
pseudonymised_file_list = anonymise_directory(
dicom_input,
output_dirpath=output_path,
keywords_to_leave_unchanged=keywords_to_leave_unchanged,
delete_unknown_tags=True,
replacement_strategy=replacement_strategy,
identifying_keywords=identifying_keywords_for_pseudo,
)
return pseudonymised_file_list
if not pathlib.Path().joinpath(dicom_input).is_file():
raise FileNotFoundError(f"Unable to find {dicom_input}")
pseudonymised_filepath = anonymise_file(
dicom_input,
output_filepath=output_path,
keywords_to_leave_unchanged=keywords_to_leave_unchanged,
delete_unknown_tags=True,
replacement_strategy=replacement_strategy,
identifying_keywords=identifying_keywords_for_pseudo,
)
return pseudonymised_filepath