Source code for qubolite.embedding

from functools import partial

import numpy as np
from sklearn.metrics import pairwise_kernels, pairwise_distances
from sklearn.preprocessing import KernelCenterer

from ._misc import get_random_state
from .qubo import qubo


[docs]class qubo_embedding:
[docs]    def map_solution(self, x):
        return NotImplemented

    @property
    def qubo(self):
        return NotImplemented

    @property
    def data(self):
        return NotImplemented

[docs]    @classmethod
    def random(cls, n: int, random_state=None):
        return NotImplemented


[docs]class Kernel2MeansClustering(qubo_embedding):
    """Binary clustering based on kernel matrices.

    Args:
        data (numpy.ndarray): Data array of shape ``(n, m)``.
            The QUBO instance will have size ``n`` (or ``n-1``, if ``unambiguous=True``).
        kernel (str, optional): Kernel function. Defaults to ``'linear'``.
            Can be any of `those <https://github.com/scikit-learn/scikit-learn/blob/7f9bad99d6e0a3e8ddf92a7e5561245224dab102/sklearn/metrics/pairwise.py#L2216>`__.
        centered (bool, optional): If ``True``, center the Kernel matrix. Defaults to ``True``.
        unambiguous (bool, optional): If ``True``, assign the last data point to cluster 0 and exclude it
            from the optimization. Otherwise, the resulting QUBO instance would have two
            symmetrical optimal cluster assignments. Defaults to False.
        **kernel_params: Additional keyword arguments for the Kernel function, passed to ``sklearn.metrics.pairwise_kernels``.
    """
    def __init__(self, data, kernel='linear', centered=True, unambiguous=False, **kernel_params):
        # for different kernels: https://scikit-learn.org/stable/modules/metrics.html
        self.__data = data
        self.__kernel = kernel
        self.__kernel_params = kernel_params
        self.__centered = centered
        self.__unambiguous = unambiguous
        self.__Q = self.__from_data()

    @property
    def qubo(self):
        return self.__Q

    @property
    def data(self):
        return dict(points=self.__data)

    def __from_data(self):
        # calculate kernel matrix
        K = pairwise_kernels(X=self.__data, metric=self.__kernel, **self.__kernel_params)
        # center kernel matrix
        if self.__centered:
            K = KernelCenterer().fit_transform(K)
        q = -K
        np.fill_diagonal(q, K.sum(1) - K.diagonal())
        # fix z_n=0 for cluster assignment
        if self.__unambiguous:
            n = K.shape[0]
            q = q[:n-1, :n-1]
        return qubo(q)

[docs]    def map_solution(self, x):
        # return cluster assignments (-1, +1)
        return 2 * x - 1

[docs]    @classmethod
    def random(cls, n: int, dim=2, dist=2.0, random_state=None, kernel=None, centered=True,
               unambiguous=True, **kernel_params):
        if kernel is None:
            kernel = 'linear'
            kernel_params = {}
        npr = get_random_state(random_state)
        data = npr.normal(size=(n, dim))
        mask = npr.permutation(n) < n // 2
        data[mask, :] += dist / np.sqrt(dim)
        data -= data.mean(0)
        return data, cls(data, kernel=kernel, centered=centered, unambiguous=unambiguous,
                         **kernel_params)


[docs]class KMedoids(qubo_embedding):
    """
    K-medoids vector quantization problem: Given a dataset, find k representatives.
    For the derivation see:
    Christian Bauckhage et al., "A QUBO Formulation of the k-Medoids Problem.”, LWDA, 2019.

    Args:
        data_set (numpy.ndarray): Data points.
        distance_matrix (numpy.ndarray, optional): Pairwise distances between data points.
            Defaults to ``Welsh``-distance.
        k (int, optional): Number of representative points. Defaults to 2.
        alpha: (float, optional): Parameter controlling far apartness of k
            representatives. Defaults to 1 / k.
        beta: (float, optional): Parameter controlling far centrality of k
            representatives. Defaults to 1 / n.
        gamma: (float, optional): Parameter controlling the enforcement of exactly k
            representatives. Defaults to 2.
    """
    def __init__(self, data_set=None, distance_matrix=None, k=2, alpha=None, beta=None, gamma=2):
        if data_set is None and distance_matrix is None:
            raise Exception('data_set or distance_matrix have to be given!')
        elif distance_matrix is None:
            distance_matrix = 1 - np.exp(- 0.5 * pairwise_distances(X=data_set,
                                                                    metric='sqeuclidean'))
        self.__data_set = data_set
        # map distance matrix to [0, 1]
        self.__distance_matrix = distance_matrix / distance_matrix.max()
        self.__n = self.__distance_matrix.shape[0]
        self.__k = k
        if alpha is None:
            alpha = 0.5 / self.__k
        self.__alpha = alpha
        if beta is None:
            beta = 1.0 / self.__n
        self.__beta = beta
        self.__gamma = gamma
        self.__Q = self.__from_data()

    @property
    def qubo(self):
        return self.__Q

    @property
    def data(self):
        return dict(points=self.__data_set,
                    distance_matrix=self.__distance_matrix)

    def __from_data(self):
        # Identification of far apart data points
        far_apart_matrix = - self.__alpha * self.__distance_matrix
        # Identification of central data points
        ones_vector = np.ones(self.__n)
        central_vector = self.__beta * self.__distance_matrix @ ones_vector
        # Ensuring k representatives
        ensuring_matrix = self.__gamma * np.ones((self.__n, self.__n))
        ensuring_vector = - 2 * self.__gamma * self.__k * ones_vector
        np.fill_diagonal(ensuring_matrix, np.diag(ensuring_matrix) + ensuring_vector)
        # Putting different objectives together
        matrix = far_apart_matrix + ensuring_matrix
        np.fill_diagonal(matrix, np.diag(matrix) + central_vector)
        return qubo(matrix)


[docs]class SubsetSum(qubo_embedding):
    """Subset Sum problem: Given a list of values, find
    a subset that adds up to a given target value.

    Args:
        values (numpy.ndarray | list): Values of which to find a subset.
            The resulting QUBO instance will have size ``len(values)``.
        target (int | float): Target value which the subset must add up to.
    """
    def __init__(self, values, target):
        self.__values = np.asarray(values)
        self.__target = target
        self.__Q = self.__from_data(values, target)

    @property
    def qubo(self):
        return self.__Q

    @property
    def data(self):
        return dict(
            values=self.__values,
            target=self.__target)

    def __from_data(self, values, target):
        q = np.outer(values, values)
        q[np.diag_indices_from(q)] -= 2 * target * values
        q = np.triu(q + np.tril(q, -1).T)
        return qubo(q)

[docs]    def map_solution(self, x):
        return self.__values[x.astype(bool)]

[docs]    @classmethod
    def random(cls, n: int, low=-100, high=100,
               summands=None, random_state=None):
        npr = get_random_state(random_state)
        values = np.zeros(n)
        while np.any(np.isclose(values, 0)):
            values = npr.uniform(low, high, size=n).round(2)
        k = round(npr.triangular(0.1*n, 0.5*n, 0.9*n)) if summands is None else summands
        subset = npr.permutation(n) < k
        target = values[subset].sum()
        return cls(values, target)


[docs]class Max2Sat(qubo_embedding):
    """Maximum Satisfyability problem with clauses of size 2.
    The problem is to find a variable assignment that maximizes
    the number of true clauses.

    Args:
        clauses (list): A list of tuples containing literals,
            representing a logical formula in CNF.
            Each tuple must have exactly two elements.
            The elements must be integers, representing the variable
            indices **counting from 1**. Negative literals have a
            negative sign. For instance, the formula :math:`(x_1\\vee \\overline{x_2})\\wedge(\\overline{x_1}\\vee x_3)`
            becomes ``[(1,-2), (-1,3)]``.
        penalty (float, optional): Penalty value for unsatisfied clauses.
            Must be positive. Defaults to ``1.0``.
    """
    def __init__(self, clauses, penalty=1.0):
        assert all(len(c) == 2 for c in clauses), 'All clauses must consist of exactly 2 variables'
        assert all(0 not in c for c in clauses), '"0" cannot be a variable, use indices >= 1'
        self.__clauses = clauses
        ix_set = set()
        ix_set.update(*self.__clauses)
        self.__indices = [i for i in sorted(ix_set) if i > 0]
        assert penalty > 0.0, 'Penalty must be positive > 0'
        self.__penalty = penalty

[docs]    def map_solution(self, x):
        return {i: x[self.__indices.find(i)] == 1 for i in self.__indices}

    @property
    def qubo(self):
        n = max(max(c) for c in self.__clauses)
        m = np.zeros((n, n))
        ix_map = {i: qi for qi, i in enumerate(self.__indices)}
        for xi, xj in map(partial(sorted, key=abs), self.__clauses):
            i = ix_map(abs(xi))
            j = ix_map(abs(xj))
            if xi > 0:
                if xj > 0:
                    m[i, i] += self.__penalty
                    m[j, j] += self.__penalty
                    m[i, j] -= self.__penalty
                else:
                    m[j, j] += self.__penalty
                    m[i, j] -= self.__penalty
            else:
                if xj > 0:
                    m[i, i] += self.__penalty
                    m[i, j] -= self.__penalty
                else:
                    m[i, j] += self.__penalty
        return qubo(m)

    @property
    def data(self):
        return self.__clauses

[docs]    @classmethod
    def random(cls, n: int, clauses=None, random_state=None):
        npr = get_random_state(random_state)
        if clauses is None:
            clauses = int(1.5 * n)
        # TODO
        return NotImplemented


[docs]class KernelSVM(qubo_embedding):
    """Kernel Support Vector Machine learning: Given labeled numerical data,
    and a kernel, determines the support vectors, which is a subset of the 
    data that lie on the margin of a separating hyperplane in the feature space 
    determined by the kernel. Note that this method makes some simplifying
    assumptions detailed in `this paper <https://ceur-ws.org/Vol-2454/paper_51.pdf>`__.

    Args:
        X (numpy.ndarray): Input data (row-wise) of shape ``(N, d)``.
        y (numpy.ndarray): Binary labels (-1 or 1) of shape ``(N,)``
        C (float, optional): Hyperparameter controlling the penalization of
            misclassified data points. Defaults to 1.
        kernel (str, optional): Kernel function. Defaults to ``'linear'``.
            Can be any of `those <https://github.com/scikit-learn/scikit-learn/blob/7f9bad99d6e0a3e8ddf92a7e5561245224dab102/sklearn/metrics/pairwise.py#L2216>`__.
        **kernel_params: Additional keyword arguments for the Kernel function, passed to ``sklearn.metrics.pairwise_kernels``.
    """

    def __init__(self, X, y, C=1.0, kernel='linear', **kernel_params):
        self.__X = X
        self.__y = y
        self.__C = C
        self.__kernel = kernel
        self.__kernel_params = kernel_params

    @property
    def data(self):
        return dict(X=self.__X, y=self.__y)
    
    @property
    def qubo(self):
        K = pairwise_kernels(
            X=self.__X,
            metric=self.__kernel,
            **self.__kernel_params)
        m = 0.5*(self.__C**2)*K*np.outer(self.__y, self.__y)
        m += np.tril(m, -1).T
        m -= self.__C*np.eye(K.shape[0])
        return qubo(np.triu(m))
    
[docs]    def map_solution(self, x):
        return np.where(x)[0]