from functools import partial
import numpy as np
from sklearn.metrics import pairwise_kernels, pairwise_distances
from sklearn.preprocessing import KernelCenterer
from ._misc import get_random_state
from .qubo import qubo
[docs]class qubo_embedding:
[docs] def map_solution(self, x):
return NotImplemented
@property
def qubo(self):
return NotImplemented
@property
def data(self):
return NotImplemented
[docs] @classmethod
def random(cls, n: int, random_state=None):
return NotImplemented
[docs]class Kernel2MeansClustering(qubo_embedding):
"""Binary clustering based on kernel matrices.
Args:
data (numpy.ndarray): Data array of shape ``(n, m)``.
The QUBO instance will have size ``n`` (or ``n-1``, if ``unambiguous=True``).
kernel (str, optional): Kernel function. Defaults to ``'linear'``.
Can be any of `those <https://github.com/scikit-learn/scikit-learn/blob/7f9bad99d6e0a3e8ddf92a7e5561245224dab102/sklearn/metrics/pairwise.py#L2216>`__.
centered (bool, optional): If ``True``, center the Kernel matrix. Defaults to ``True``.
unambiguous (bool, optional): If ``True``, assign the last data point to cluster 0 and exclude it
from the optimization. Otherwise, the resulting QUBO instance would have two
symmetrical optimal cluster assignments. Defaults to False.
**kernel_params: Additional keyword arguments for the Kernel function, passed to ``sklearn.metrics.pairwise_kernels``.
"""
def __init__(self, data, kernel='linear', centered=True, unambiguous=False, **kernel_params):
# for different kernels: https://scikit-learn.org/stable/modules/metrics.html
self.__data = data
self.__kernel = kernel
self.__kernel_params = kernel_params
self.__centered = centered
self.__unambiguous = unambiguous
self.__Q = self.__from_data()
@property
def qubo(self):
return self.__Q
@property
def data(self):
return dict(points=self.__data)
def __from_data(self):
# calculate kernel matrix
K = pairwise_kernels(X=self.__data, metric=self.__kernel, **self.__kernel_params)
# center kernel matrix
if self.__centered:
K = KernelCenterer().fit_transform(K)
q = -K
np.fill_diagonal(q, K.sum(1) - K.diagonal())
# fix z_n=0 for cluster assignment
if self.__unambiguous:
n = K.shape[0]
q = q[:n-1, :n-1]
return qubo(q)
[docs] def map_solution(self, x):
# return cluster assignments (-1, +1)
return 2 * x - 1
[docs] @classmethod
def random(cls, n: int, dim=2, dist=2.0, random_state=None, kernel=None, centered=True,
unambiguous=True, **kernel_params):
if kernel is None:
kernel = 'linear'
kernel_params = {}
npr = get_random_state(random_state)
data = npr.normal(size=(n, dim))
mask = npr.permutation(n) < n // 2
data[mask, :] += dist / np.sqrt(dim)
data -= data.mean(0)
return data, cls(data, kernel=kernel, centered=centered, unambiguous=unambiguous,
**kernel_params)
[docs]class KMedoids(qubo_embedding):
"""
K-medoids vector quantization problem: Given a dataset, find k representatives.
For the derivation see:
Christian Bauckhage et al., "A QUBO Formulation of the k-Medoids Problem.”, LWDA, 2019.
Args:
data_set (numpy.ndarray): Data points.
distance_matrix (numpy.ndarray, optional): Pairwise distances between data points.
Defaults to ``Welsh``-distance.
k (int, optional): Number of representative points. Defaults to 2.
alpha: (float, optional): Parameter controlling far apartness of k
representatives. Defaults to 1 / k.
beta: (float, optional): Parameter controlling far centrality of k
representatives. Defaults to 1 / n.
gamma: (float, optional): Parameter controlling the enforcement of exactly k
representatives. Defaults to 2.
"""
def __init__(self, data_set=None, distance_matrix=None, k=2, alpha=None, beta=None, gamma=2):
if data_set is None and distance_matrix is None:
raise Exception('data_set or distance_matrix have to be given!')
elif distance_matrix is None:
distance_matrix = 1 - np.exp(- 0.5 * pairwise_distances(X=data_set,
metric='sqeuclidean'))
self.__data_set = data_set
# map distance matrix to [0, 1]
self.__distance_matrix = distance_matrix / distance_matrix.max()
self.__n = self.__distance_matrix.shape[0]
self.__k = k
if alpha is None:
alpha = 0.5 / self.__k
self.__alpha = alpha
if beta is None:
beta = 1.0 / self.__n
self.__beta = beta
self.__gamma = gamma
self.__Q = self.__from_data()
@property
def qubo(self):
return self.__Q
@property
def data(self):
return dict(points=self.__data_set,
distance_matrix=self.__distance_matrix)
def __from_data(self):
# Identification of far apart data points
far_apart_matrix = - self.__alpha * self.__distance_matrix
# Identification of central data points
ones_vector = np.ones(self.__n)
central_vector = self.__beta * self.__distance_matrix @ ones_vector
# Ensuring k representatives
ensuring_matrix = self.__gamma * np.ones((self.__n, self.__n))
ensuring_vector = - 2 * self.__gamma * self.__k * ones_vector
np.fill_diagonal(ensuring_matrix, np.diag(ensuring_matrix) + ensuring_vector)
# Putting different objectives together
matrix = far_apart_matrix + ensuring_matrix
np.fill_diagonal(matrix, np.diag(matrix) + central_vector)
return qubo(matrix)
[docs]class SubsetSum(qubo_embedding):
"""Subset Sum problem: Given a list of values, find
a subset that adds up to a given target value.
Args:
values (numpy.ndarray | list): Values of which to find a subset.
The resulting QUBO instance will have size ``len(values)``.
target (int | float): Target value which the subset must add up to.
"""
def __init__(self, values, target):
self.__values = np.asarray(values)
self.__target = target
self.__Q = self.__from_data(values, target)
@property
def qubo(self):
return self.__Q
@property
def data(self):
return dict(
values=self.__values,
target=self.__target)
def __from_data(self, values, target):
q = np.outer(values, values)
q[np.diag_indices_from(q)] -= 2 * target * values
q = np.triu(q + np.tril(q, -1).T)
return qubo(q)
[docs] def map_solution(self, x):
return self.__values[x.astype(bool)]
[docs] @classmethod
def random(cls, n: int, low=-100, high=100,
summands=None, random_state=None):
npr = get_random_state(random_state)
values = np.zeros(n)
while np.any(np.isclose(values, 0)):
values = npr.uniform(low, high, size=n).round(2)
k = round(npr.triangular(0.1*n, 0.5*n, 0.9*n)) if summands is None else summands
subset = npr.permutation(n) < k
target = values[subset].sum()
return cls(values, target)
[docs]class Max2Sat(qubo_embedding):
"""Maximum Satisfyability problem with clauses of size 2.
The problem is to find a variable assignment that maximizes
the number of true clauses.
Args:
clauses (list): A list of tuples containing literals,
representing a logical formula in CNF.
Each tuple must have exactly two elements.
The elements must be integers, representing the variable
indices **counting from 1**. Negative literals have a
negative sign. For instance, the formula :math:`(x_1\\vee \\overline{x_2})\\wedge(\\overline{x_1}\\vee x_3)`
becomes ``[(1,-2), (-1,3)]``.
penalty (float, optional): Penalty value for unsatisfied clauses.
Must be positive. Defaults to ``1.0``.
"""
def __init__(self, clauses, penalty=1.0):
assert all(len(c) == 2 for c in clauses), 'All clauses must consist of exactly 2 variables'
assert all(0 not in c for c in clauses), '"0" cannot be a variable, use indices >= 1'
self.__clauses = clauses
ix_set = set()
ix_set.update(*self.__clauses)
self.__indices = [i for i in sorted(ix_set) if i > 0]
assert penalty > 0.0, 'Penalty must be positive > 0'
self.__penalty = penalty
[docs] def map_solution(self, x):
return {i: x[self.__indices.find(i)] == 1 for i in self.__indices}
@property
def qubo(self):
n = max(max(c) for c in self.__clauses)
m = np.zeros((n, n))
ix_map = {i: qi for qi, i in enumerate(self.__indices)}
for xi, xj in map(partial(sorted, key=abs), self.__clauses):
i = ix_map(abs(xi))
j = ix_map(abs(xj))
if xi > 0:
if xj > 0:
m[i, i] += self.__penalty
m[j, j] += self.__penalty
m[i, j] -= self.__penalty
else:
m[j, j] += self.__penalty
m[i, j] -= self.__penalty
else:
if xj > 0:
m[i, i] += self.__penalty
m[i, j] -= self.__penalty
else:
m[i, j] += self.__penalty
return qubo(m)
@property
def data(self):
return self.__clauses
[docs] @classmethod
def random(cls, n: int, clauses=None, random_state=None):
npr = get_random_state(random_state)
if clauses is None:
clauses = int(1.5 * n)
# TODO
return NotImplemented
[docs]class KernelSVM(qubo_embedding):
"""Kernel Support Vector Machine learning: Given labeled numerical data,
and a kernel, determines the support vectors, which is a subset of the
data that lie on the margin of a separating hyperplane in the feature space
determined by the kernel. Note that this method makes some simplifying
assumptions detailed in `this paper <https://ceur-ws.org/Vol-2454/paper_51.pdf>`__.
Args:
X (numpy.ndarray): Input data (row-wise) of shape ``(N, d)``.
y (numpy.ndarray): Binary labels (-1 or 1) of shape ``(N,)``
C (float, optional): Hyperparameter controlling the penalization of
misclassified data points. Defaults to 1.
kernel (str, optional): Kernel function. Defaults to ``'linear'``.
Can be any of `those <https://github.com/scikit-learn/scikit-learn/blob/7f9bad99d6e0a3e8ddf92a7e5561245224dab102/sklearn/metrics/pairwise.py#L2216>`__.
**kernel_params: Additional keyword arguments for the Kernel function, passed to ``sklearn.metrics.pairwise_kernels``.
"""
def __init__(self, X, y, C=1.0, kernel='linear', **kernel_params):
self.__X = X
self.__y = y
self.__C = C
self.__kernel = kernel
self.__kernel_params = kernel_params
@property
def data(self):
return dict(X=self.__X, y=self.__y)
@property
def qubo(self):
K = pairwise_kernels(
X=self.__X,
metric=self.__kernel,
**self.__kernel_params)
m = 0.5*(self.__C**2)*K*np.outer(self.__y, self.__y)
m += np.tril(m, -1).T
m -= self.__C*np.eye(K.shape[0])
return qubo(np.triu(m))
[docs] def map_solution(self, x):
return np.where(x)[0]