Coverage for src/causalspyne/data_gen.py: 100%
27 statements
« prev ^ index » next coverage.py v7.11.0, created at 2026-05-15 16:30 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2026-05-15 16:30 +0000
1"""
2generate Linear Gaussian
3"""
5import numpy as np
7from causalspyne.noise_idiosyncratic import Idiosyncratic
8from causalspyne.edge_models import EdgeModelLinear
9from causalspyne.utils_random import coerce_rng
12class DataGen:
13 def __init__(self, dag, edge_model=None,
14 dft_noise: str="Gaussian",
15 dict_params: dict | None=None,
16 idiosynchratic: dict[int, Idiosyncratic] | None=None,
17 rng=None):
18 rng = coerce_rng(rng, seed=0)
19 self.dag = dag
20 self.dict_idiosyncratic = idiosynchratic or {}
22 self.idiosyncratic = Idiosyncratic(class_name=dft_noise,
23 rng=rng,
24 dict_params=dict_params or {})
25 self.edge_model = edge_model
26 if edge_model is None:
27 self.edge_model = EdgeModelLinear(self.dag)
29 def gen(self, num_samples):
30 """
31 Generate linear Gaussian data from a given DAG.
33 Parameters:
34 - num_samples: int, number of samples to generate.
35 - noise_std: float, standard deviation of the Gaussian noise.
37 Returns:
38 - data: np.ndarray, generated data of shape (num_samples, num_nodes).
39 """
40 list_ind_nodes_topo_order = self.dag.topological_sort()
42 # Number of nodes
43 num_nodes = len(list_ind_nodes_topo_order)
45 # Initialize the data matrix
46 data = np.zeros((num_samples, num_nodes))
48 # Generate data for each node in topological order
49 for node in list_ind_nodes_topo_order:
50 list_parents_inds = self.dag.get_list_parents_inds(node)
51 if node in self.dict_idiosyncratic:
52 noise = self.dict_idiosyncratic[node].gen(num_samples)
53 else:
54 noise = self.idiosyncratic.gen(num_samples)
55 data[:, node] = noise
56 if list_parents_inds:
57 bias = self.edge_model.run(node, data[:, list_parents_inds])
58 data[:, node] += bias
59 return data