Source code for PlanetAlign.algorithms.regal

import torch
import numpy as np
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj, degree
import time
import psutil
import os

from PlanetAlign.data import Dataset
from PlanetAlign.metrics import hits_ks_scores, mrr_score
from PlanetAlign.utils import get_anchor_pairs

from .base_model import BaseModel


[docs] class REGAL(BaseModel): r"""Embedding-based method REGAL for unsupervised pairwise attributed network alignment. REGAL is proposed by the paper: "`REGAL: Representation Learning-based Graph Alignment. <https://dl.acm.org/doi/10.1145/3269206.3271788>`_" in CIKM 2018. Parameters ---------- k : int, optional Hyperparameter for tuning the number of landmarks. Default is 10. num_layers : int, optional Number of layers for the neighborhood when generating structural embeddings. Default is 2. alpha : float, optional Hyperparameter for the decay factor of the structural embedding. Default is 0.01. gammastruc : float, optional Weight for the structural similarity. Default is 1. gammaattr : float, optional Weight for the attribute similarity. Default is 1. buckets : int, optional Number of buckets for the structural embedding learning. Default is 2. dtype : torch.dtype, optional Data type of the tensors, choose from torch.float32 or torch.float64. Default is torch.float32. """ def __init__(self, k: int = 10, num_layers: int = 2, alpha: float = 0.01, gammastruc: float = 1, gammaattr: float = 1, buckets: int = 2, dtype: torch.dtype = torch.float32): super(REGAL, self).__init__(dtype=dtype) assert isinstance(k, int) and k > 0, 'k must be a positive integer' assert isinstance(num_layers, int) and num_layers > 0, 'Number of layers must be a positive integer' assert 0 < alpha < 1, 'Alpha must be in the range (0, 1)' assert gammastruc >= 0, 'Gamma for structural similarity must be non-negative' assert gammaattr >= 0, 'Gamma for attribute similarity must be non-negative' assert isinstance(buckets, int) and buckets > 1, 'Buckets must be an integer greater than 1' self.k = k self.num_layers = num_layers self.alpha = alpha self.gammastruc = gammastruc self.gammaattr = gammaattr self.buckets = buckets
[docs] def train(self, dataset: Dataset, gid1: int, gid2: int, use_attr: bool = True, save_log: bool = True, verbose: bool = True): """ Parameters ---------- dataset : Dataset The dataset containing the graphs to be aligned and the training/test data. gid1 : int The index of the first graph in the dataset to be aligned. gid2 : int The index of the second graph in the dataset to be aligned. use_attr : bool, optional Whether to use node and edge attributes for alignment. Default is True. save_log : bool, optional Whether to save the log of the training process. Default is True. verbose : bool, optional Whether to print the progress during training. Default is True. """ self.check_inputs(dataset, (gid1, gid2), plain_method=False, use_attr=use_attr, pairwise=True, supervised=False) logger = self.init_training_logger(dataset, use_attr, additional_headers=['memory', 'infer_time'], save_log=save_log) process = psutil.Process(os.getpid()) graph1, graph2 = dataset.pyg_graphs[gid1], dataset.pyg_graphs[gid2] anchor_links = get_anchor_pairs(dataset.train_data, gid1, gid2) test_pairs = get_anchor_pairs(dataset.test_data, gid1, gid2) t0 = time.time() emb1, emb2, loss = self.get_xnetmf_embedding(graph1, graph2, anchor_links, use_attr, verbose) t1 = time.time() # Evaluation S = emb1 @ emb2.T hits = hits_ks_scores(S, test_pairs, mode='mean') mrr = mrr_score(S, test_pairs, mode='mean') mem_gb = process.memory_info().rss / 1024 ** 3 logger.log(epoch=1, loss=loss, epoch_time=t1-t0, hits=hits, mrr=mrr, memory=round(mem_gb, 4), infer_time=round(t1-t0, 4), verbose=verbose) return emb1, emb2, logger
def get_xnetmf_embedding(self, graph1, graph2, anchor_links, use_attr, verbose=True): degree1 = degree(graph1.edge_index[0], num_nodes=graph1.num_nodes) degree2 = degree(graph2.edge_index[0], num_nodes=graph2.num_nodes) max_degree = max(degree1.max().item(), degree2.max().item()) if verbose: print('Generating structural embeddings...', end=' ') struct_emb1 = self.get_structural_embedding(graph1, max_degree) struct_emb2 = self.get_structural_embedding(graph2, max_degree) if verbose: print('Done') if verbose: print('Generating xNetMF embeddings...', end=' ') n1, n2 = graph1.num_nodes, graph2.num_nodes num_landmarks = min(int(self.k * np.log(n1 + n2) / np.log(2)), n1 + n2) sampled_landmarks = torch.randperm(n1 + n2)[:num_landmarks] # Merge embeddings of the two graphs for effective similarity computation struct_emb = torch.vstack([struct_emb1, struct_emb2]) landmarks_struct_emb = struct_emb[sampled_landmarks] struct_dist = torch.norm(struct_emb[:, None, :] - landmarks_struct_emb[None, :, :], dim=2) num_anchors = anchor_links.shape[0] anchor_emb1 = torch.empty((n1, 0), dtype=self.dtype) anchor_emb2 = torch.empty((n2, 0), dtype=self.dtype) if num_anchors > 0: anchor_emb1 = torch.zeros((n1, num_anchors), dtype=self.dtype) anchor_emb2 = torch.zeros((n2, num_anchors), dtype=self.dtype) anchor_emb1[anchor_links[:, 0], torch.arange(num_anchors)] = 1 anchor_emb2[anchor_links[:, 1], torch.arange(num_anchors)] = 1 attr_emb1 = torch.empty((n1, 0), dtype=self.dtype) attr_emb2 = torch.empty((n2, 0), dtype=self.dtype) if use_attr and graph1.x is not None and graph2.x is not None: attr_emb1 = graph1.x attr_emb2 = graph2.x input_emb1 = torch.hstack([anchor_emb1, attr_emb1]) input_emb2 = torch.hstack([anchor_emb2, attr_emb2]) if input_emb1.shape[1] == 0 or input_emb2.shape[1] == 0: attribute_dist = 0 else: input_emb = torch.vstack([input_emb1, input_emb2]) landmarks_attr_emb = input_emb[sampled_landmarks] attribute_dist = (input_emb[:, None, :] != landmarks_attr_emb[None, :, :]).to(self.dtype).sum(dim=2) C = torch.exp(-(self.gammastruc * struct_dist + self.gammaattr * attribute_dist)) W_pinv = torch.pinverse(C[sampled_landmarks]) U, X, V = torch.svd(W_pinv) Wfac = U @ torch.diag(torch.sqrt(X)) xnetmf_emb = C @ Wfac xnetmf_emb = F.normalize(xnetmf_emb, p=2, dim=1) if verbose: print('Done') W_reconstructed = U @ torch.diag(X) @ V.T svd_recon_loss = F.mse_loss(W_reconstructed, W_pinv) return xnetmf_emb[:n1], xnetmf_emb[n1:], svd_recon_loss.item() def get_structural_embedding(self, graph, max_degree): degrees = degree(graph.edge_index[0], num_nodes=graph.num_nodes) assert degrees.max().item() <= max_degree, 'Max degree is less than the maximum degree in the graph' neighborhood = self.get_k_hop_neighbors(graph, self.num_layers) num_features = int(np.log(max_degree) / np.log(self.buckets)) + 1 structural_embedding = torch.zeros(graph.num_nodes, num_features) for n in range(graph.num_nodes): for layer in neighborhood[n].keys(): neighbors = neighborhood[n][layer] if len(neighbors) > 0: # get degree sequence neighbors_degrees = degrees[neighbors] filtered_neighbors_degrees = neighbors_degrees[neighbors_degrees > 0] index_vec = (torch.log(filtered_neighbors_degrees) / np.log(self.buckets)).to(torch.int) structural_embedding[n, :] += torch.bincount(index_vec, minlength=num_features) * (self.alpha ** layer) return structural_embedding @staticmethod def get_k_hop_neighbors(graph, k): adj = to_dense_adj(graph.edge_index, max_num_nodes=graph.num_nodes).squeeze().to(torch.bool) neighborhoods = {node: {0: torch.tensor([node])} for node in range(graph.num_nodes)} for node in range(graph.num_nodes): last_neighbor_vec = torch.zeros(graph.num_nodes, dtype=torch.bool) last_neighbor_vec[node] = True for layer in range(1, k + 1): neighbor_vec = (adj[neighborhoods[node][layer - 1]].sum(dim=0) > 0) & ~last_neighbor_vec if neighbor_vec.sum() == 0: break neighborhoods[node][layer] = torch.where(neighbor_vec)[0] last_neighbor_vec |= neighbor_vec return neighborhoods