Source code for domainlab.models.model_diva

"""
DIVA
"""
import torch
from torch.nn import functional as F

from domainlab import g_inst_component_loss_agg, g_str_cross_entropy_agg
from domainlab.models.model_vae_xyd_classif import VAEXYDClassif
from domainlab.utils.utils_class import store_args


[docs] def mk_diva(parent_class=VAEXYDClassif, **kwargs): """ Instantiate a domain invariant variational autoencoder (DIVA) with arbitrary task loss. Details: This method is creating a generative model based on a variational autoencoder, which can reconstruct the input images. Here for, three different encoders with latent variables are trained, each representing a latent subspace for the domain, class and residual features information, respectively. The latent subspaces serve for disentangling the respective sources of variation. To reconstruct the input image, the three latent variables are fed into a decoder. Additionally, two classifiers are trained, which predict the domain and the class label. For more details, see: Ilse, Maximilian, et al. "Diva: Domain invariant variational autoencoders." Medical Imaging with Deep Learning. PMLR, 2020. Args: parent_class: Class object determining the task type. Defaults to VAEXYDClassif. Returns: ModelDIVA: model inheriting from parent class. Input Parameters: zd_dim: size of latent space for domain-specific information, zy_dim: size of latent space for class-specific information, zx_dim: size of latent space for residual variance, chain_node_builder: creates the neural network specified by the user; object of the class "VAEChainNodeGetter" (see domainlab/compos/vae/utils_request_chain_builder.py) being initialized by entering a user request, list_str_y: list of labels, list_d_tr: list of training domains, gamma_d: weighting term for d classifier, gamma_y: weighting term for y classifier, beta_d: weighting term for domain encoder, beta_x: weighting term for residual variation encoder, beta_y: weighting term for class encoder Usage: For a concrete example, see: https://github.com/marrlab/DomainLab/blob/master/tests/test_mk_exp_diva.py """ class ModelDIVA(parent_class): """ DIVA """ @store_args def __init__( self, chain_node_builder, zd_dim, zy_dim, zx_dim, list_d_tr, gamma_d, gamma_y, beta_d, beta_x, beta_y, multiplier_recon=1.0, ): """ gamma: classification loss coefficient """ super().__init__(chain_node_builder, zd_dim, zy_dim, zx_dim, **kwargs) self.list_d_tr = list_d_tr self.dim_d_tr = len(self.list_d_tr) if self.zd_dim > 0: self.add_module( "net_p_zd", self.chain_node_builder.construct_cond_prior( self.dim_d_tr, self.zd_dim ), ) self.add_module( "net_classif_d", self.chain_node_builder.construct_classifier( self.zd_dim, self.dim_d_tr ), ) def hyper_update(self, epoch, fun_scheduler): """hyper_update. :param epoch: :param fun_scheduler: """ dict_rst = fun_scheduler(epoch) self.beta_d = dict_rst[self.name + "_beta_d"] self.beta_y = dict_rst[self.name + "_beta_x"] self.beta_x = dict_rst[self.name + "_beta_y"] def hyper_init(self, functor_scheduler): """ initiate a scheduler object via class name and things inside this model :param functor_scheduler: the class name of the scheduler """ parameters = {} parameters[self.name + "_beta_d"] = self.beta_d parameters[self.name + "_beta_y"] = self.beta_y parameters[self.name + "_beta_x"] = self.beta_x return functor_scheduler( trainer=None, **parameters ) def _cal_reg_loss(self, tensor_x, tensor_y, tensor_d, others=None): q_zd, zd_q, q_zx, zx_q, q_zy, zy_q = self.encoder(tensor_x) logit_d = self.net_classif_d(zd_q) batch_size = zd_q.shape[0] device = zd_q.device p_zx = self.init_p_zx4batch(batch_size, device) p_zy = self.net_p_zy(tensor_y) p_zd = self.net_p_zd(tensor_d) z_concat = self.decoder.concat_ydx(zy_q, zd_q, zx_q) loss_recon_x, _, _ = self.decoder(z_concat, tensor_x) zd_p_minus_zd_q = g_inst_component_loss_agg( p_zd.log_prob(zd_q) - q_zd.log_prob(zd_q), 1 ) # without aggregation, shape is [batchsize, zd_dim] zx_p_minus_zx_q = torch.zeros_like(zd_p_minus_zd_q) if self.zx_dim > 0: # torch.sum will return 0 for empty tensor, # torch.mean will return nan zx_p_minus_zx_q = g_inst_component_loss_agg( p_zx.log_prob(zx_q) - q_zx.log_prob(zx_q), 1 ) zy_p_minus_zy_q = g_inst_component_loss_agg( p_zy.log_prob(zy_q) - q_zy.log_prob(zy_q), 1 ) _, d_target = tensor_d.max(dim=1) lc_d = F.cross_entropy(logit_d, d_target, reduction=g_str_cross_entropy_agg) return [ loss_recon_x, zd_p_minus_zd_q, zx_p_minus_zx_q, zy_p_minus_zy_q, lc_d, ], [ self.multiplier_recon, -self.beta_d, -self.beta_x, -self.beta_y, self.gamma_d, ] return ModelDIVA