from typing import Union, Optional
from pathlib import Path
import os
import torch
from PlanetAlign.data import Dataset
from .utils import download_file_from_google_drive
[docs]
class PPI(Dataset):
"""A pair of networks synthesized from the protein-protein interaction (PPI) network. The PPI network is proposed by
the paper `"Predicting Multicellular Function through Multi-layer Tissue Networks" <https://arxiv.org/abs/1707.04638>`_,
where nodes represent human proteins and edges represent physical interaction between proteins in a human cell. The
immunological signatures are included as node features. The two networks are noisy permutations of the original network
generated by randomly inserting 10% edges (PPI1) and deleting 15% edges (PPI2) from the original network, respectively.
There are in total 3,980 common nodes across two networks.
.. list-table::
:widths: 10 10 10 10 10
:header-rows: 1
* - Graph
- #nodes
- #edges
- #node attrs
- #edge attrs
* - PPI1
- 3,980
- 42,612
- 50
- 0
* - PPI2
- 3,980
- 32,929
- 50
- 0
"""
def __init__(self,
root: Union[str, Path],
download: Optional[bool] = False,
train_ratio: Optional[float] = 0.2,
dtype: torch.dtype = torch.float32,
seed: Optional[int] = 0):
if download:
download_file_from_google_drive(
remote_file_id='1gD0F3KJQP8oYTQqWiRmceOWbpjNj-WHy',
save_filename='PPI.pt',
root=root)
if not self._check_integrity(root):
raise RuntimeError('PPI dataset not found or corrupted. You can use download=True to download it')
super(PPI, self).__init__(root=root, name='PPI', train_ratio=train_ratio, dtype=dtype, seed=seed)
def _check_integrity(self, root):
return os.path.exists(os.path.join(root, 'PPI.pt'))