Source code for kalmus.utils.measure_utils

""" Image Comparison Utility """

import Bio.pairwise2 as sequence_align
import numpy as np
from skimage.color import rgb2hsv
from skimage.metrics import mean_squared_error, structural_similarity


[docs]def nrmse_similarity(image_1, image_2, norm_mode="Min max"): """ Normalized root mean squared error (NRMSE). :param image_1: The image 1 for comparison :type image_1: numpy.ndarray :param image_2: The image 2 for comparison :type image_2: numpy.ndarray :param norm_mode: The mode for the normalization, average mode use the max (||image_1||, ||image_2||) \ Min max use the max(image_1 value range, image_2 value range) :type norm_mode: str :return: The score that measure the similarity between two images in range [0,1] using NRMSE \ 0 is the least similar, 1 is the most similar (same) :rtype: float """ image_1 = image_1.astype("float64") image_2 = image_2.astype("float64") if norm_mode == "Average norm": image_1_avg_norm = np.sqrt(np.mean(image_1 * image_1)) image_2_avg_norm = np.sqrt(np.mean(image_2 * image_2)) denom = max(image_1_avg_norm, image_2_avg_norm) elif norm_mode == "Min max": image_1_min_max = image_1.max() - image_1.min() image_2_min_max = image_2.max() - image_1.min() denom = max(image_1_min_max, image_2_min_max) score = 1 - np.sqrt(mean_squared_error(image_1, image_2)) / denom return score
[docs]def ssim_similarity(image_1, image_2, window_size=None): """ Structural similarity index measure (ssim) :param image_1: The image 1 for comparison :type image_1: numpy.ndarray :param image_2: The image 2 for comparison :type image_2: numpy.ndarray :param window_size: The size of the local window, integer :type window_size: int :return: The Structural similarity index score in range [0,1] \ 0 is the least similar, 1 is the most similar (same) :rtype: float """ assert image_1.shape == image_2.shape, "The shape of two images used for computing structural similarity must " \ "be the same." assert len(image_1.shape) >= 2, "The image must be a 2D image (single channel greyscale image or multi-channel" \ "color image)" if window_size is not None: assert window_size % 2 == 1 and window_size < min(image_1.shape[0], image_1.shape[1]), \ "The size of the local window must be an odd number and smaller than the size of input images" image_1 = image_1.astype("float64") image_2 = image_2.astype("float64") if len(image_1.shape) == 2: score = structural_similarity(image_1, image_2, win_size=window_size, multichannel=False) elif len(image_1.shape) > 2: score = structural_similarity(image_1, image_2, win_size=window_size, multichannel=True) # Renormalize [-1, 1] score to [0, 1] range score += 1 score /= 2 return score
def get_resample_index(num_frames, sample_amount=10): """ Helper function Get the resample indexes based on the number of frames in sequences and the amount of samples we want to extract. The indexes are equally spaced. (linear interpolation) :param num_frames: The total number of frames :type num_frames: int :param sample_amount: How many frames that you want to sample from them :type sample_amount: int :return: np.array of indexes that are equally spaced from 0. The size of the array == sample_amount """ assert num_frames >= sample_amount, "The number of data in" possible_index = np.arange(0, num_frames, (num_frames - 1) / (sample_amount - 1)) nearest_int_index = np.round(possible_index) nearest_int_index[-1] = num_frames - 1 return nearest_int_index.astype('int64')
[docs]def cross_correlation(signal_template, signal_source): """ Signal matching. Cross correlation of two input signals. Signals need to be in the same shape :param signal_template: The template signal :type signal_template: numpy.ndarray :param signal_source: The source signal :type signal_source: numpy.ndarray :return: The cross correlation between two input signals. High cross correlation means high similarity between \ two input signals. range in [-1, 1] :rtype: float """ assert signal_template.shape == signal_source.shape, "The shape of two input signals/color barcodes must have the" \ "same shapes." template = signal_template.copy().astype("float64") source = signal_source.copy().astype("float64") template -= np.mean(signal_template, axis=tuple(np.arange(len(signal_template.shape) - 1))) source -= np.mean(signal_source, axis=tuple(np.arange(len(signal_template.shape) - 1))) nom = np.sum(template * source) denom = np.sqrt(np.sum(template * template)) * np.sqrt(np.sum(source * source)) cross_corre = nom / denom return cross_corre
[docs]def local_cross_correlation(signal_template, signal_source, horizontal_interval=40, vertical_interval=40): """ Local cross correlation between two input signals. The input signals need to be 2 dimensional for local windowing :param signal_template: The template signal :type signal_template: numpy.ndarray :param signal_source: The source signal :type signal_source: numpy.ndarray :param horizontal_interval: Number of horizontal intervals (window width == signal width // horizontal intervals) :type horizontal_interval: int :param vertical_interval: Number of vertical intervals (window height == signal height // vertical intervals) :type vertical_interval: int :return: The local cross correlation between two signals. Higher local cross correlation means higher similarity \ between two signals. range in [-1, 1] :rtype: float """ assert signal_source.shape == signal_template.shape, "Incompatiable shape between source and template signals" assert len(signal_source.shape) >= 2, "local cross correlation requires the input signals to be 2 dimensional" interval_row = signal_template.shape[0] // vertical_interval interval_col = signal_template.shape[1] // horizontal_interval if interval_row == 0: interval_row = 1 if interval_col == 0: interval_col = 1 template = signal_template.copy().astype("float64") source = signal_source.copy().astype("float64") for start_row in range(0, template.shape[0], interval_row): for start_col in range(0, template.shape[1], interval_col): template[start_row: start_row + interval_row, start_col: start_col + interval_col, ...] -= \ np.mean(template[start_row: start_row + interval_row, start_col: start_col + interval_col, ...], axis=(0, 1)) source[start_row: start_row + interval_row, start_col: start_col + interval_col, ...] -= \ np.mean(source[start_row: start_row + interval_row, start_col: start_col + interval_col, ...], axis=(0, 1)) nom = np.sum(template * source) denom = np.sqrt(np.sum(template ** 2)) * np.sqrt(np.sum(source ** 2)) cross_corre = nom / denom return cross_corre
[docs]def generate_hue_strings_from_color_barcode(color_barcode, num_interval=12): """ Helper function Generate the characters strings that represent the hue values of the input RGB color barcode (3 channel in range [0, 255]). :param color_barcode: Input color barcode, the input barcode must be a 1 dimensional color barcode with \ ``kalmus.barcodes.ColorBarcode.colors`` three channels (R, G, B). shape == [number of colors, 3] :type color_barcode: numpy.ndarray :param num_interval: The number of intervals that will be divided in the Hue ring (0 to 360 degree) :type num_interval: int :return: The string where each character represent the hue interval of the colors in the input RGB barcode :rtype: str """ assert len(color_barcode.shape) == 2 and color_barcode.shape[-1] == 3, "The input color barcode must be a " \ "2D array of 3-chanel RGB colors" color_barcode = rgb2hsv(color_barcode.reshape(-1, 1, 3)).reshape(-1, 3) hue_barcode = color_barcode[..., 0] * 360 hue_barcode += 15 hue_barcode[hue_barcode >= 360] -= 360 interval_size = 360 / num_interval hue_barcode /= interval_size hue_barcode = hue_barcode.astype("uint16") string_barcode = "" for i in hue_barcode: str_code = i if str_code > 9: str_code = chr(ord("a") + (str_code - 9)) else: str_code = str(str_code) string_barcode += str_code return string_barcode
[docs]def generate_brightness_string_from_brightness_barcode(brightness_barcode, num_interval=15): """ Helper function Generate the string where each character represents the brightness interval of the brightness in the input brightness barcode. :param brightness_barcode: Input 1 dimensional brightness barcode with 1 channel. \ ``kalmus.barcodes.Barcode.BrightnessBarcode.brightness`` \ shape == [number of brightness, 1] :type brightness_barcode: numpy.ndarray :param num_interval: The number of intervals that will be divided in the brightness range [0, 255] :type num_interval: int :return: The string where each character represents the brightness interval of the brightness in the input :rtype: str """ assert len(brightness_barcode.shape) == 2 and brightness_barcode.shape[-1] == 1, \ "The input brightness barcode must be a 2D array with last channel to be 1" interval_size = 255 / num_interval bri_barcode = brightness_barcode[:, 0] // interval_size bri_barcode = bri_barcode.astype("uint16") string_barcode = "" for i in bri_barcode: str_code = i if str_code > 9: str_code = chr(ord("a") + (str_code - 9)) else: str_code = str(str_code) string_barcode += str_code return string_barcode
[docs]def compare_needleman_wunsch(str_barcode_1, str_barcode_2, local_sequence_size=2000, match_score=2, mismatch_penal=-1, gap_penal=-0.5, extending_gap_penal=-0.1, normalized=False): """ Compare two input character arrays/strings (barcode)'s matching score using the Needleman Wunsch method. Needleman Wunsch: https://www.sciencedirect.com/science/article/abs/pii/0022283670900574?via%3Dihub :param str_barcode_1: The input string representation of barcode 1 :type str_barcode_1: str :param str_barcode_2: The input string representation of barcode 2 :type str_barcode_2: str :param local_sequence_size: Divide the long barcode into several small barcode with local_sequence_size length :type local_sequence_size: int :param match_score: The score (bonus) for correctly matching character :type match_score: int :param mismatch_penal: The penalty for mismatch character :type mismatch_penal: int :param gap_penal: The penalty for gaps within matched sequence :type gap_penal: int :param extending_gap_penal: The penalty for extending gaps :type extending_gap_penal: int :param normalized: If True normalize the final matching score into range [0, 1]. If False, return the raw score :type normalized: bool :return: The match score/normalized match score :rtype: float """ assert len(str_barcode_1) == len(str_barcode_2), "The lengths of two barcodes have to be identical" scores = 0 for start_point in range(0, len(str_barcode_1), local_sequence_size): scores += sequence_align.align.globalms(str_barcode_1[start_point:start_point + local_sequence_size], str_barcode_2[start_point:start_point + local_sequence_size], match_score, mismatch_penal, gap_penal, extending_gap_penal, score_only=True) if normalized: denom = len(str_barcode_1) * match_score else: denom = 1 return scores / denom
[docs]def compare_smith_waterman(str_barcode_1, str_barcode_2, local_sequence_size=2000, match_score=2, mismatch_penal=-1, gap_penal=-0.5, extending_gap_penal=-0.1, normalized=False): """ Compare two input character arrays/strings (barcode)'s matching score using the Smith Waterman method. Smith Waterman: https://www.sciencedirect.com/science/article/abs/pii/0022283681900875?via%3Dihub :param str_barcode_1: The input string representation of barcode 1 :type str_barcode_1: str :param str_barcode_2: The input string representation of barcode 2 :type str_barcode_2: str :param local_sequence_size: Divide the long barcode into several small barcode with local_sequence_size length :type local_sequence_size: int :param match_score: The score (bonus) for correctly matching character :type match_score: int :param mismatch_penal: The penalty for mismatch character :type mismatch_penal: int :param gap_penal: The penalty for gaps within matched sequence :type gap_penal: int :param extending_gap_penal: The penalty for extending gaps :type extending_gap_penal: int :param normalized: If True, normalize the final matching score into range [0, 1]. If False, return the raw score. :type normalized: bool :return: The match score/normalized match score :rtype: float """ assert len(str_barcode_1) == len(str_barcode_2), "The lengths of two barcodes have to be identical" scores = 0 for start_point in range(0, len(str_barcode_1), local_sequence_size): scores += sequence_align.align.localms(str_barcode_1[start_point:start_point + local_sequence_size], str_barcode_2[start_point:start_point + local_sequence_size], match_score, mismatch_penal, gap_penal, extending_gap_penal, score_only=True) if normalized: denom = len(str_barcode_1) * match_score else: denom = 1 return scores / denom