Source code for PlanetAlign.utils.noise

from typing import Union, Optional, List, Tuple

import numpy as np
import torch
from torch_geometric.utils import to_undirected
from torch_geometric.data import Data

from PlanetAlign.data import Dataset


def perturb_edges(graph: Data,
                  noise_ratio: float,
                  seed: Optional[int] = None) -> torch.Tensor:
    """
    Add structural noise by perturbing edges in a PyG dataset.

    Parameters
    ----------
    graph : PyG graph
        The input graph to perturb.
    noise_ratio : float
        The ratio of edges to perturb.
    seed : int, optional
        Random seed for reproducibility.

    Returns
    -------
    torch.Tensor
        The perturbed edge index of the graph.
    """

    num_edges = graph.num_edges
    num_perturb_edges = int(num_edges * noise_ratio)
    num_nodes = graph.num_nodes

    edge_set = set()
    for i in range(graph.edge_index.size(1)):
        u, v = graph.edge_index[0, i].item(), graph.edge_index[1, i].item()
        if u != v:
            edge_set.add((min(u, v), max(u, v)))

    rng_state = None
    if seed is not None:
        rng_state = np.random.get_state()  # save current state
        np.random.seed(seed)  # set seed for reproducibility

    cnt = 0
    while cnt < num_perturb_edges:
        u, v = np.random.randint(0, num_nodes), np.random.randint(0, num_nodes)
        if u == v:
            continue

        if (min(u, v), max(u, v)) in edge_set:
            edge_set.remove((min(u, v), max(u, v)))
        else:
            edge_set.add((min(u, v), max(u, v)))
        cnt += 1

    if seed is not None:
        np.random.set_state(rng_state)

    # Convert edge_set back to edge_index
    new_edge_index = torch.tensor(list(edge_set), dtype=torch.int64).T
    new_edge_index = to_undirected(new_edge_index)

    return new_edge_index


[docs] def add_edge_noises(dataset: Dataset, noise_ratio: float, gids: Optional[Union[int, List[int], Tuple[int, ...]]] = None, seed: Optional[int] = None, inplace: bool = False) -> Dataset: """ Add structural noise to graphs in a PlanetAlign dataset by perturbing edges. Parameters ---------- dataset : PyG dataset The input dataset containing graphs. noise_ratio : float The ratio of edges to perturb in each graph. gids : int, list of int, or tuple of int The graph IDs to perturb. If None, all graphs will be perturbed. seed : int, optional Random seed for reproducibility. inplace : bool, optional If True, modify the dataset in place. Otherwise, return a new dataset. Returns ------- PyG dataset The dataset with perturbed edges. """ assert 0 <= noise_ratio <= 1, "Noise ratio must be between 0 and 1." if gids is not None: if isinstance(gids, int): gids = [gids] elif isinstance(gids, list) or isinstance(gids, tuple): gids = list(gids) else: raise TypeError("gids must be an int, list of int, or tuple of int.") else: gids = list(range(len(dataset.pyg_graphs))) assert all(0 <= gid < len(dataset.pyg_graphs) for gid in gids), "Invalid graph IDs." if not inplace: dataset = dataset.clone() for gid in gids: graph = dataset.pyg_graphs[gid] edge_index = perturb_edges(graph, noise_ratio, seed) dataset.pyg_graphs[gid].edge_index = edge_index return dataset
def flip_attributes(graph: Data, noise_ratio: float, seed: Optional[int] = None) -> torch.Tensor: """ Add attribute noise by flipping node attributes in a PyG graph. Parameters ---------- graph : PyG graph The input graph to perturb. noise_ratio : float The ratio of attributes to flip. seed : int, optional Random seed for reproducibility. Returns ------- torch.Tensor The perturbed node attributes of the graph. """ def is_binary_tensor(tensor: torch.Tensor) -> bool: """ Check if a PyTorch tensor contains only binary values (0 and 1). Parameters ---------- tensor : torch.Tensor The input tensor to check. Returns ------- bool True if tensor contains only 0 and 1, False otherwise. """ unique_vals = torch.unique(tensor) return torch.all((unique_vals == 0) | (unique_vals == 1)).item() assert is_binary_tensor(graph.x), "Node attributes must be binary (0 and 1)." num_nodes, num_attrs = graph.x.size() num_flip_attrs = int(num_attrs * noise_ratio) rng_state = None if seed is not None: rng_state = np.random.get_state() # save current state np.random.seed(seed) # set seed for reproducibility flipped_x = torch.clone(graph.x) for idx in range(num_nodes): perturbed_attr = np.random.choice(num_attrs, num_flip_attrs, replace=False) flipped_x[idx, perturbed_attr] = 1 - flipped_x[idx, perturbed_attr] if seed is not None: np.random.set_state(rng_state) return flipped_x def perturb_attributes_gaussian(graph: Data, std: float, seed: Optional[int] = None) -> torch.Tensor: """ Add Gaussian noise to node attributes in a PyG graph. Parameters ---------- graph : PyG graph The input graph to perturb. std : float Standard deviation of the Gaussian noise. seed : int, optional Random seed for reproducibility. Returns ------- torch.Tensor The perturbed node attributes of the graph. """ rng_state = None if seed is not None: rng_state = torch.get_rng_state() torch.manual_seed(seed) x = graph.x mean = x.mean(dim=0, keepdim=True) std_dev = x.std(dim=0, keepdim=True) + 1e-12 x_norm = (x - mean) / std_dev noise = torch.randn_like(x_norm) * std if seed is not None: torch.set_rng_state(rng_state) x_noisy = (x_norm + noise) * std_dev + mean return x_noisy
[docs] def add_attr_noises(dataset: Dataset, mode: str, noise_ratio: float, gids: Optional[Union[int, List[int], Tuple[int, ...]]] = None, seed: Optional[int] = None, inplace: bool = False) -> Dataset: """ Add attribute noise to graphs in a PlanetAlign dataset by perturbing node attributes. Parameters ---------- dataset : PyG dataset The input dataset containing graphs. mode: str The mode of noise to add. Options are 'flip' or 'gaussian'. noise_ratio : float The ratio of attributes to flip in each graph. gids : int, list of int, or tuple of int The graph IDs to perturb. If None, all graphs will be perturbed. seed : int, optional Random seed for reproducibility. inplace : bool, optional If True, modify the dataset in place. Otherwise, return a new dataset. Returns ------- PyG dataset The dataset with perturbed attributes. """ assert 0 <= noise_ratio <= 1, "Noise ratio must be between 0 and 1." if gids is not None: if isinstance(gids, int): gids = [gids] elif isinstance(gids, list) or isinstance(gids, tuple): gids = list(gids) else: raise TypeError("gids must be an int, list of int, or tuple of int.") else: gids = list(range(len(dataset.pyg_graphs))) assert all(0 <= gid < len(dataset.pyg_graphs) for gid in gids), "Invalid graph IDs." if not inplace: dataset = dataset.clone() for gid in gids: graph = dataset.pyg_graphs[gid] if mode == 'flip': x = flip_attributes(graph, noise_ratio, seed) elif mode == 'gaussian': x = perturb_attributes_gaussian(graph, noise_ratio, seed) else: raise ValueError("Invalid mode. Choose either 'flip' or 'gaussian'.") dataset.pyg_graphs[gid].x = x return dataset
def perturb_supervision(dataset: Dataset, noise_ratio: float, src_gid: int = 0, dst_gid: int = 1, seed: Optional[int] = None) -> torch.Tensor: """ Add supervision noise to PlanetAlign dataset object Parameters ---------- dataset : Dataset The input dataset containing graphs. noise_ratio: float The ratio of supervision to perturb. src_gid : int, optional The graph ID of the source graph. Default is 0. dst_gid : int, optional The graph ID of the destination graph. Default is 1. seed : int, optional Random seed for reproducibility. """ assert 0 <= noise_ratio <= 1, "Noise ratio must be between 0 and 1." assert src_gid < len(dataset.pyg_graphs), f"Source graph ID {src_gid} is out of range." assert dst_gid < len(dataset.pyg_graphs), f"Destination graph ID {dst_gid} is out of range." assert src_gid != dst_gid, "Source and destination graph IDs must be different." rng_state = None if seed is not None: rng_state = torch.get_rng_state() torch.manual_seed(seed) dst_test_nodes = torch.unique(dataset.test_data[:, dst_gid]) dst_nodes = torch.arange(dataset.pyg_graphs[dst_gid].num_nodes) candidate_noisy_dst_anchors = dst_nodes[~torch.isin(dst_nodes, dst_test_nodes)] noisy_train_data = dataset.train_data.clone() num_noisy_src_anchors = int(len(dataset.train_data) * noise_ratio) noisy_src_anchors_idx = torch.randperm(len(dataset.train_data))[:num_noisy_src_anchors] for noisy_src_anchor_idx in noisy_src_anchors_idx: dst_anchor = dataset.train_data[noisy_src_anchor_idx, dst_gid] noisy_anchor = dst_anchor while noisy_anchor == dst_anchor: noisy_anchor = candidate_noisy_dst_anchors[ torch.randint(0, len(candidate_noisy_dst_anchors), (1,)).item() ] noisy_train_data[noisy_src_anchor_idx, dst_gid] = noisy_anchor if seed is not None: torch.set_rng_state(rng_state) return noisy_train_data
[docs] def add_sup_noises(dataset: Dataset, noise_ratio: float, src_gid: int = 0, dst_gid: int = 1, seed: Optional[int] = None, inplace: bool = False) -> Dataset: """ Add supervision noise to graphs in a PlanetAlign dataset by injecting noisy anchors. Parameters ---------- dataset : Dataset The input dataset containing graphs. noise_ratio: float The ratio of supervision to perturb. src_gid : int, optional The graph ID of the source graph. Default is 0. dst_gid : int, optional The graph ID of the destination graph. Default is 1. seed : int, optional Random seed for reproducibility. inplace : bool, optional If True, modify the dataset in place. Otherwise, return a new dataset. Returns ------- PyG dataset The dataset with perturbed supervision. """ if not inplace: dataset = dataset.clone() dataset.train_data = perturb_supervision(dataset, noise_ratio, src_gid, dst_gid, seed) return dataset