Source code for PlanetAlign.datasets.ppi

from typing import Union, Optional
from pathlib import Path
import os
import torch

from PlanetAlign.data import Dataset
from .utils import download_file_from_google_drive


[docs] class PPI(Dataset): """A pair of networks synthesized from the protein-protein interaction (PPI) network. The PPI network is proposed by the paper `"Predicting Multicellular Function through Multi-layer Tissue Networks" <https://arxiv.org/abs/1707.04638>`_, where nodes represent human proteins and edges represent physical interaction between proteins in a human cell. The immunological signatures are included as node features. The two networks are noisy permutations of the original network generated by randomly inserting 10% edges (PPI1) and deleting 15% edges (PPI2) from the original network, respectively. There are in total 3,980 common nodes across two networks. .. list-table:: :widths: 10 10 10 10 10 :header-rows: 1 * - Graph - #nodes - #edges - #node attrs - #edge attrs * - PPI1 - 3,980 - 42,612 - 50 - 0 * - PPI2 - 3,980 - 32,929 - 50 - 0 """ def __init__(self, root: Union[str, Path], download: Optional[bool] = False, train_ratio: Optional[float] = 0.2, dtype: torch.dtype = torch.float32, seed: Optional[int] = 0): if download: download_file_from_google_drive( remote_file_id='1gD0F3KJQP8oYTQqWiRmceOWbpjNj-WHy', save_filename='PPI.pt', root=root) if not self._check_integrity(root): raise RuntimeError('PPI dataset not found or corrupted. You can use download=True to download it') super(PPI, self).__init__(root=root, name='PPI', train_ratio=train_ratio, dtype=dtype, seed=seed) def _check_integrity(self, root): return os.path.exists(os.path.join(root, 'PPI.pt'))