Coverage for src/causalspyne/data_gen.py: 100%

27 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2026-05-15 16:30 +0000

1""" 

2generate Linear Gaussian 

3""" 

4 

5import numpy as np 

6 

7from causalspyne.noise_idiosyncratic import Idiosyncratic 

8from causalspyne.edge_models import EdgeModelLinear 

9from causalspyne.utils_random import coerce_rng 

10 

11 

12class DataGen: 

13 def __init__(self, dag, edge_model=None, 

14 dft_noise: str="Gaussian", 

15 dict_params: dict | None=None, 

16 idiosynchratic: dict[int, Idiosyncratic] | None=None, 

17 rng=None): 

18 rng = coerce_rng(rng, seed=0) 

19 self.dag = dag 

20 self.dict_idiosyncratic = idiosynchratic or {} 

21 

22 self.idiosyncratic = Idiosyncratic(class_name=dft_noise, 

23 rng=rng, 

24 dict_params=dict_params or {}) 

25 self.edge_model = edge_model 

26 if edge_model is None: 

27 self.edge_model = EdgeModelLinear(self.dag) 

28 

29 def gen(self, num_samples): 

30 """ 

31 Generate linear Gaussian data from a given DAG. 

32 

33 Parameters: 

34 - num_samples: int, number of samples to generate. 

35 - noise_std: float, standard deviation of the Gaussian noise. 

36 

37 Returns: 

38 - data: np.ndarray, generated data of shape (num_samples, num_nodes). 

39 """ 

40 list_ind_nodes_topo_order = self.dag.topological_sort() 

41 

42 # Number of nodes 

43 num_nodes = len(list_ind_nodes_topo_order) 

44 

45 # Initialize the data matrix 

46 data = np.zeros((num_samples, num_nodes)) 

47 

48 # Generate data for each node in topological order 

49 for node in list_ind_nodes_topo_order: 

50 list_parents_inds = self.dag.get_list_parents_inds(node) 

51 if node in self.dict_idiosyncratic: 

52 noise = self.dict_idiosyncratic[node].gen(num_samples) 

53 else: 

54 noise = self.idiosyncratic.gen(num_samples) 

55 data[:, node] = noise 

56 if list_parents_inds: 

57 bias = self.edge_model.run(node, data[:, list_parents_inds]) 

58 data[:, node] += bias 

59 return data