Title: | Learn and Apply Directed Acyclic Graphs for Causal Inference |
---|---|
Description: | Causal Inference Assistance (CIA) for performing causal inference within the structural causal modelling framework. Structure learning is performed using partition Markov chain Monte Carlo (Kuipers & Moffa, 2017) and several additional functions have been added to help with causal inference. Kuipers and Moffa (2017) <doi:10.1080/01621459.2015.1133426>. |
Authors: | Mathew Varidel [aut, cre, cph]
|
Maintainer: | Mathew Varidel <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.1.0 |
Built: | 2025-02-17 05:17:10 UTC |
Source: | https://github.com/spaceodyssey/cia |
Index a cia_chain object
## S3 method for class 'cia_chain' x = list()[i, ...]
## S3 method for class 'cia_chain' x = list()[i, ...]
x |
A cia_chain object. |
i |
An index. |
... |
ellipsis for extra indexing parameters. |
A cia_chain.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[[1]][5]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[[1]][5]
Index a cia_chains object
## S3 method for class 'cia_chains' x = list()[i, ...]
## S3 method for class 'cia_chains' x = list()[i, ...]
x |
A cia_chain object. |
i |
An index to get the cia_chain iterations. |
... |
ellipsis for extra indexing parameters. |
A cia_chains object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[5]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[5]
Indexing with respect to iterations.
## S3 method for class 'cia_post_chain' x = list()[i, ...]
## S3 method for class 'cia_post_chain' x = list()[i, ...]
x |
A cia_post_chain object. |
i |
An index. |
... |
ellipsis for extra indexing parameters. |
chain A cia_post_chain.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) pedge_sample[5, ]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) pedge_sample[5, ]
Index a cia_post_chains object with respect to iterations.
## S3 method for class 'cia_post_chains' x = list()[i, ...]
## S3 method for class 'cia_post_chains' x = list()[i, ...]
x |
A cia_post_chain object. |
i |
An index to get the cia_post_chain iterations. |
... |
ellipsis for extra indexing parameters. |
chain A cia_post_chains object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) pedge_sample[5, ]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) pedge_sample[5, ]
Index a cia_chains object
## S3 method for class 'cia_chains' x[[i, ...]]
## S3 method for class 'cia_chains' x[[i, ...]]
x |
A cia_chains object. |
i |
An index to get the cia_chain. |
... |
ellipsis for extra indexing parameters. |
A cia_chains object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[[1]][1:3]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) results[[1]][1:3]
Index a cia_post_chains object.
## S3 method for class 'cia_post_chains' x[[i, ...]]
## S3 method for class 'cia_post_chains' x[[i, ...]]
x |
A cia_post_chains object. |
i |
An index to get the cia_post_chain. |
... |
ellipsis for extra indexing parameters. |
chain A cia_post_chains object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) head(pedge_sample[[1]])
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains) head(pedge_sample[[1]])
A thin wrapper on the bnlearn::score function.
BNLearnScorer(node, parents, ...)
BNLearnScorer(node, parents, ...)
node |
Name of node to score. |
parents |
The parents of node. |
... |
The ellipsis is used to pass other parameters to the scorer. |
A numeric value representing the log score of the node given the parents.
data <- bnlearn::learning.test BNLearnScorer('A', c('B', 'C'), data = data) BNLearnScorer('A', c(), data = data) BNLearnScorer('A', vector(), data = data) BNLearnScorer('A', NULL, data = data) BNLearnScorer('A', c('B', 'C'), data = data, type = "bde", iss = 100) BNLearnScorer('A', c('B', 'C'), data = data, type = "bde", iss = 1)
data <- bnlearn::learning.test BNLearnScorer('A', c('B', 'C'), data = data) BNLearnScorer('A', c(), data = data) BNLearnScorer('A', vector(), data = data) BNLearnScorer('A', NULL, data = data) BNLearnScorer('A', c('B', 'C'), data = data, type = "bde", iss = 100) BNLearnScorer('A', c('B', 'C'), data = data, type = "bde", iss = 1)
This makes the assumption that the proposal has saved a variable "proposal_used" and mcmc has saved a variable 'accept'.
CalculateAcceptanceRates(chains, group_by = NULL)
CalculateAcceptanceRates(chains, group_by = NULL)
chains |
MCMC chains. |
group_by |
Vector of strings that are in c("chain", "proposal_used"). Default is NULL which will return the acceptance rates marginalised over chains and the proposal used. |
Summary of acceptance rates per grouping.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) CalculateAcceptanceRates(results)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) CalculateAcceptanceRates(results)
Calculate pairwise edge probabilities. The posterior probability of an edge
given the data
is given by marginalising out
the graph structure
over the graph space
, such that
CalculateEdgeProbabilities(x, ...)
CalculateEdgeProbabilities(x, ...)
x |
A cia_chain(s) or collection object where states are DAGs. |
... |
Extra parameters sent to the methods. For a dag collection you can choose to use estimated p(g|D) in two ways which can be specified using the 'method' parameter.method='sampled' for MCMC sampled frequency (which is our recommended method) or method='score' which uses the normalised scores. |
The posterior probability for a given graph p(g|D) is estimated in two ways which can be specified using the 'method' parameter.
Matrix of edge probabilities.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) CalculateEdgeProbabilities(dag_chains)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) CalculateEdgeProbabilities(dag_chains)
Calculate the posterior expected value for a feature (, e.g.,
existence of an edge in graph
) by marginalising out the graph
structure
over the graph space
, thus
This can be useful for calculating point estimates of quantities of interests, such as the probability that an edge exists or the probability of one node being an ancestor of another.
CalculateFeatureMean(x, p_feature, ...)
CalculateFeatureMean(x, p_feature, ...)
x |
A chain(s) or collection object. |
p_feature |
A function that takes an adjacency matrix or collection object
and returns a scalar corresponding to |
... |
Extra parameters sent to the methods. For a dag collection you can choose to use estimated p(g|D) in two ways which can be specified using the 'method' parameter.method='sampled' for MCMC sampled frequency (which is our recommended method) or method='score' which uses the normalised scores. |
A numeric value representing the posterior probability of the feature.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Calculate the mean edge probability per chain. CalculateFeatureMean(dag_chains, function(x) { return(x) }) # Calculate the mean edge probability across chains. CalculateFeatureMean(FlattenChains(dag_chains), function(x) { return(x) })
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Calculate the mean edge probability per chain. CalculateFeatureMean(dag_chains, function(x) { return(x) }) # Calculate the mean edge probability across chains. CalculateFeatureMean(FlattenChains(dag_chains), function(x) { return(x) })
Get the unique set of states along with their log score.
CollectUniqueObjects(x)
CollectUniqueObjects(x)
x |
A cia_chains or cia_chain object. |
This gets the unique set of states in cia_chain(s) referred to as
objects (). Then it estimates the probability for each state using two
methods. The
log_sampling_prob
is the MCMC sampled frequency estimate for
the posterior probability.
An alternative method to estimate the posterior probability for each state
uses the state score. This is recorded in the log_norm_state_score
. This
approach estimates the log of the normalisation constant assuming
where
is
the set of unique objects in the chain. This assumes that you have captured the
most probable objects, such that
is approximately equal to
the true evidence
where the
sum across all possible DAGs (
). This also makes the
assumption that the exponential of the score is proportional to the posterior
probability, such that
where is the parents set for node
given the
graph
.
After the normalisation constant has been estimated we then estimate the log probability of each object as,
Preliminary analysis suggests that the sampling frequency approach is more consistent across chains when estimating marginalised edge probabilities, and therefore is our preferred method. However, more work needs to be done here.
A list with entries:
state: List of unique states.
log_evidence_state: Numeric value representing the evidence calculated from the states.
log_state_score: Vector with the log scores for each state.
log_sampling_prob: Vector with the log of the probability for each state estimated using the MCMC sampling frequency.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(100, init_state, PartitionMCMC(), scorer) collection <- CollectUniqueObjects(results)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(100, init_state, PartitionMCMC(), scorer) collection <- CollectUniqueObjects(results)
This is a constructor for a single Coupled Partition MCMC step. The function
constructs an environment with the proposal, inverse temperature, and verbose
flag. It then returns a function that takes the current_state and a scorer
object. This only allows the scores to be raised to a constant temperature
for every step.
CoupledPartitionMCMC( proposal = DefaultProposal(), temperature = c(1, 10, 100, 1000), verbose = TRUE )
CoupledPartitionMCMC( proposal = DefaultProposal(), temperature = c(1, 10, 100, 1000), verbose = TRUE )
proposal |
Proposal function for each chain. The swap proposal is dealt with internally. Default is the DefaultProposal. |
temperature |
Numeric value representing the temperature to raise the score to. Default is c(1.0, 10.0, 100.0, 1000.0). |
verbose |
Flag to pass MCMC information. |
One step implementation of the tempered partition MCMC.
Function that takes the current state and scorer that outputs a new state.
scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) nodes <- names(bnlearn::learning.test) n_coupled_chains <- 8 coupled_state <- InitCoupledPartition(nodes, scorer, n_parallel_chains = 1, n_coupled_chains = n_coupled_chains) coupled_transition <- CoupledPartitionMCMC( proposal = DefaultProposal(p = c(0.0, 1.0, 0.0, 0.0, 0.0)), temperature = 2^(0:(n_coupled_chains - 1)) ) coupled_transition(coupled_state, scorer)
scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) nodes <- names(bnlearn::learning.test) n_coupled_chains <- 8 coupled_state <- InitCoupledPartition(nodes, scorer, n_parallel_chains = 1, n_coupled_chains = n_coupled_chains) coupled_transition <- CoupledPartitionMCMC( proposal = DefaultProposal(p = c(0.0, 1.0, 0.0, 0.0, 0.0)), temperature = 2^(0:(n_coupled_chains - 1)) ) coupled_transition(coupled_state, scorer)
Scorer constructor
CreateScorer( scorer = BNLearnScorer, ..., max_parents = Inf, blacklist = NULL, whitelist = NULL, cache = FALSE, nthreads = 1 )
CreateScorer( scorer = BNLearnScorer, ..., max_parents = Inf, blacklist = NULL, whitelist = NULL, cache = FALSE, nthreads = 1 )
scorer |
A scorer function that takes (node, parents) as parameters. Default is BNLearnScorer. |
... |
Parameters to pass to scorer. |
max_parents |
The maximum number of allowed parents. Default is infinite. |
blacklist |
A boolean matrix of (parent, child) pairs where TRUE represents edges that cannot be in the DAG. Default is NULL which represents no blacklisting. |
whitelist |
A boolean matrix of (parent, child) pairs where TRUE represents edges that must be in the DAG. Default is NULL which represents no whitelisting. |
cache |
A boolean to indicate whether to build the cache. The cache only works for problems where the scorer only varies as a function of (node, parents). Default is FALSE. |
nthreads |
Number of threads used to construct cache. |
A list with entries:
scorer: Function that takes (node, parents) as parameters and returns the score.
parameters: List of extra parameters passed to the scorer.
max_parents: Integer representing the maximum number of possible possible parents that any child can have.
blacklist: Matrix where each cell represents the (parent, child) pairs that must not be present when equal to 1.
whitelist: Matrix where each cell represents the (parent, child) pairs that must be present when equal to 1. state estimated using the MCMC sampling frequency.
scorer <- CreateScorer(data = bnlearn::asia)
scorer <- CreateScorer(data = bnlearn::asia)
Converts a directed acyclic graph (DAG) into it's equivalence class corresponding to a completed partially directed acyclic graph (CPDAG).
DAGtoCPDAG(x)
DAGtoCPDAG(x)
x |
A matrix, cia_chain, or cia_chains object. When it is a chain(s) object the state must be an adjacency matrix. |
x Returns same object type converted to a CPDAG.
dag <- UniformlySampleDAG(LETTERS[1:3]) DAGtoCPDAG(dag)
dag <- UniformlySampleDAG(LETTERS[1:3]) DAGtoCPDAG(dag)
This converts a DAG to it's partition by iteratively constructing sets of outpoints. This is further explained in section 4.1 of Kuipers & Moffa (2017).
DAGtoPartition(dag)
DAGtoPartition(dag)
dag |
A directed acyclic graph represented as an adjacency matrix, igraph, or bnlearn object. |
Labelled partition for the given adjacency matrix.
Kuipers, J., & Moffa, G. (2017). Partition MCMC for inference on acyclic digraphs. Journal of the American Statistical Association, 112(517), 282-299.
dag <- UniformlySampleDAG(LETTERS[1:3]) partitioned_nodes <- DAGtoPartition(dag)
dag <- UniformlySampleDAG(LETTERS[1:3]) partitioned_nodes <- DAGtoPartition(dag)
This constructs a proposal function for PartitionMCMC.
DefaultProposal(p = c(0.33, 0.33, 0.165, 0.165, 0.01), verbose = TRUE)
DefaultProposal(p = c(0.33, 0.33, 0.165, 0.165, 0.01), verbose = TRUE)
p |
Probability for each proposal in the order (split_join, node_move, swap_node, swap_adjacent, stay_still). |
verbose |
Boolean flag to record proposal used. |
A function corresponding to the default proposal.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC( proposal = DefaultProposal(p = c(0.0, 1.0, 0.0, 0.0, 0.0)) ), scorer)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC( proposal = DefaultProposal(p = c(0.0, 1.0, 0.0, 0.0, 0.0)) ), scorer)
Flatten a cia_chains object into a single cia_chain object. This is helpful for when you want to calculate a feature across using all samples across the cia_chains.
FlattenChains(chains)
FlattenChains(chains)
chains |
A cia_chains object. |
A cia_chain object of flattened samples.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) FlattenChains(results)[1:3]
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) FlattenChains(results)[1:3]
Get an empty DAG given a set of nodes.
GetEmptyDAG(nodes)
GetEmptyDAG(nodes)
nodes |
A vector of node names. |
An adjacency matrix with elements designated as (parent, child).
GetEmptyDAG(LETTERS[1:3])
GetEmptyDAG(LETTERS[1:3])
Get edges that do not incrementally improve the score over an empty DAG
greater than a cutoff. In detail, this returns the edges where a graph
with the edge given by
such that
Score(g_E) - Score(g_empty) < cutoff. Assuming that the scorer returns the
log of the marginalised posterior, then the cutoff corresponds to the log of
the Bayes Factor. The output can be used as a blacklist.
GetIncrementalScoringEdges(scorer, cutoff = 0)
GetIncrementalScoringEdges(scorer, cutoff = 0)
scorer |
A scorer object. |
cutoff |
A score cutoff. The score cutoff is equal to the log of the Bayes Factor between the two models. |
A Boolean matrix of (parent, child) pairs for blacklisting.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) blacklist <- GetIncrementalScoringEdges(scorer, cutoff = -10.0) blacklist_scorer <- CreateScorer( scorer = BNLearnScorer, data = data, blacklist = blacklist, cache = TRUE ) # Randomly sample a starting DAG consistent with the blacklist. Then # convert to a partition. init_state <- InitPartition(colnames(data), blacklist_scorer) results <- SampleChains(10, init_state, PartitionMCMC(), blacklist_scorer)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) blacklist <- GetIncrementalScoringEdges(scorer, cutoff = -10.0) blacklist_scorer <- CreateScorer( scorer = BNLearnScorer, data = data, blacklist = blacklist, cache = TRUE ) # Randomly sample a starting DAG consistent with the blacklist. Then # convert to a partition. init_state <- InitPartition(colnames(data), blacklist_scorer) results <- SampleChains(10, init_state, PartitionMCMC(), blacklist_scorer)
Get the lowest pairwise scoring edges represented as a blacklist matrix.
This blacklisting procedure is motivated by Koller & Friedman (2003). This
is rarely used now as we found that it blacklists edges that have significant
dependencies but are not in the top edges. We prefer
the GetIncrementalScoringEdges method.
GetLowestPairwiseScoringEdges(scorer, n_retain)
GetLowestPairwiseScoringEdges(scorer, n_retain)
scorer |
A scorer object. |
n_retain |
An integer representing the number of edges to retain. |
A boolean matrix of (parent, child) pairs for blacklisting.
Koller D, Friedman N. Being Bayesian about network structure. A Bayesian approach to structure discovery in Bayesian networks. Mach Learn. 2003;50(1):95–125.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) blacklist <- GetLowestPairwiseScoringEdges(scorer, 3) blacklist_scorer <- CreateScorer( scorer = BNLearnScorer, data = data, blacklist = blacklist, cache = TRUE ) init_state <- InitPartition(colnames(data), blacklist_scorer) results <- SampleChains(10, init_state, PartitionMCMC(), blacklist_scorer)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) blacklist <- GetLowestPairwiseScoringEdges(scorer, 3) blacklist_scorer <- CreateScorer( scorer = BNLearnScorer, data = data, blacklist = blacklist, cache = TRUE ) init_state <- InitPartition(colnames(data), blacklist_scorer) results <- SampleChains(10, init_state, PartitionMCMC(), blacklist_scorer)
Get the maximum a posteriori state
GetMAP(x)
GetMAP(x)
x |
A collection of unique objects or chains object. |
A list with the adjacency matrix for the map and it's posterior probability. It is possible for it to return multiple DAGs. The list has elements;
state: List of MAP DAGs.
log_p: Numeric vector with the log posterior probability for each state.
log_state_score: Numeric vector representing the log score for each state.
log_norm_state_score: Numeric vector representing the log of the normalised score for each state.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) # Get the MAP per chain. Can be helpful to compare chains. GetMAP(results) # Get MAP across all chains. results |> FlattenChains() |> GetMAP()
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) # Get the MAP per chain. Can be helpful to compare chains. GetMAP(results) # Get MAP across all chains. results |> FlattenChains() |> GetMAP()
Initialise partition state for SampleChains.
InitCoupledPartition( nodes, scorer, init_state = NULL, n_coupled_chains = 4, n_parallel_chains = 2 )
InitCoupledPartition( nodes, scorer, init_state = NULL, n_coupled_chains = 4, n_parallel_chains = 2 )
nodes |
A character vector of node names. |
scorer |
A scorer object. |
init_state |
Coupled partition state. Default is NULL. |
n_coupled_chains |
Number of coupled chains. Default is 4. |
n_parallel_chains |
Number of parallel chains to be run in SampleChains. Default is 2. |
scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) nodes <- names(bnlearn::learning.test) InitCoupledPartition(nodes, scorer)
scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) nodes <- names(bnlearn::learning.test) InitCoupledPartition(nodes, scorer)
Initialise states for SampleChains. Initialise partition state for SampleChains.
InitPartition(nodes, scorer, init_state = NULL, n_parallel_chains = 2)
InitPartition(nodes, scorer, init_state = NULL, n_parallel_chains = 2)
nodes |
A character vector of node names. |
scorer |
A scorer object. |
init_state |
A data.frame representing a partition. Default is NULL. |
n_parallel_chains |
Number of parallel chains to be run in SampleChains. Default is 2. |
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) InitPartition(colnames(data), scorer)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) InitPartition(colnames(data), scorer)
Mutilate a graph in accordance with an intervention. This is typically used to perform a do-operation on a given graph. Please note that any evidence set within the original grain object will not be passed to the new object.
MutilateGraph(grain_object, intervention)
MutilateGraph(grain_object, intervention)
grain_object |
A grain object. |
intervention |
A list of nodes and their corresponding intervention distribution represented as a vector of unconditional probabilities. |
A grain object.
# This creates a mutilated graph in accordance with turning the sprinkler # on in the wet grass example (i.e, do(S = 'yes')). yn <- c("yes", "no") p.R <- gRain::cptable(~R, values=c(.2, .8), levels=yn) p.S_R <- gRain::cptable(~S:R, values=c(.01, .99, .4, .6), levels=yn) p.G_SR <- gRain::cptable(~G:S:R, values=c(.99, .01, .8, .2, .9, .1, 0, 1), levels=yn) wet.cpt <- gRain::grain(gRain::compileCPT(p.R, p.S_R, p.G_SR)) mut_graph <- MutilateGraph(wet.cpt, list(S = c(1.0, 0.0))) # You can then use querygrain to perform an intervention query. For example, # p(G | do(S = 'yes')) is given by, gRain::querygrain(mut_graph, 'G') # You can also perform an observational query for a node not affected # by the intervention. For example, p(R | do(S = 'yes')) is given by, gRain::querygrain(mut_graph, 'R')
# This creates a mutilated graph in accordance with turning the sprinkler # on in the wet grass example (i.e, do(S = 'yes')). yn <- c("yes", "no") p.R <- gRain::cptable(~R, values=c(.2, .8), levels=yn) p.S_R <- gRain::cptable(~S:R, values=c(.01, .99, .4, .6), levels=yn) p.G_SR <- gRain::cptable(~G:S:R, values=c(.99, .01, .8, .2, .9, .1, 0, 1), levels=yn) wet.cpt <- gRain::grain(gRain::compileCPT(p.R, p.S_R, p.G_SR)) mut_graph <- MutilateGraph(wet.cpt, list(S = c(1.0, 0.0))) # You can then use querygrain to perform an intervention query. For example, # p(G | do(S = 'yes')) is given by, gRain::querygrain(mut_graph, 'G') # You can also perform an observational query for a node not affected # by the intervention. For example, p(R | do(S = 'yes')) is given by, gRain::querygrain(mut_graph, 'R')
This is a constructor for a single Tempered Partition MCMC step. The function constructs an environment with the proposal, inverse temperature, and verbose flag. It then returns a function that takes the current_state and a scorer object. This only allows the scores to be raised to a constant temperature for every step.
PartitionMCMC( proposal = DefaultProposal(), temperature = 1, prerejection = FALSE, verbose = TRUE )
PartitionMCMC( proposal = DefaultProposal(), temperature = 1, prerejection = FALSE, verbose = TRUE )
One step implementation of the tempered partition MCMC.
Function that takes the current state and scorer that outputs a new state.
dag <- UniformlySampleDAG(c('A', 'B', 'C', 'D', 'E', 'F')) partitioned_nodes <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) current_state <- list( state = partitioned_nodes, log_score = ScoreLabelledPartition(partitioned_nodes, scorer) ) pmcmc <- PartitionMCMC(proposal = DefaultProposal(), temperature = 1.0) pmcmc(current_state, scorer)
dag <- UniformlySampleDAG(c('A', 'B', 'C', 'D', 'E', 'F')) partitioned_nodes <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = bnlearn::learning.test ) current_state <- list( state = partitioned_nodes, log_score = ScoreLabelledPartition(partitioned_nodes, scorer) ) pmcmc <- PartitionMCMC(proposal = DefaultProposal(), temperature = 1.0) pmcmc(current_state, scorer)
Samples a DAG in accordance with it's posterior probability conditional on it being consistent with a partition.
PartitiontoDAG(partitions, scorer)
PartitiontoDAG(partitions, scorer)
partitions |
A cia_chain(s) object or data.frame representing the partition. |
scorer |
A scorer object. |
A cia_chain(s) object or adjacency matrix. For a cia_chain(s) object each state will be an adjacency matrix.
data <- bnlearn::learning.test dag <- UniformlySampleDAG(colnames(data)) partition <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) # Used to sample from a single partition. PartitiontoDAG(partition, scorer) # Used to convert a chain of partitions to DAGs. init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(3, init_state, PartitionMCMC(), scorer) PartitiontoDAG(results, scorer)
data <- bnlearn::learning.test dag <- UniformlySampleDAG(colnames(data)) partition <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) # Used to sample from a single partition. PartitiontoDAG(partition, scorer) # Used to convert a chain of partitions to DAGs. init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(3, init_state, PartitionMCMC(), scorer) PartitiontoDAG(results, scorer)
Plot a concordance plot to compare point-estimates for quantities of interest between chains.
PlotConcordance(x, ...)
PlotConcordance(x, ...)
x |
A list of adjacency matrices representing edge probabilities, a chains object, or a collections object with states as DAGs. |
... |
Additional parameter to send to the appropriate method. This includes 'highlight' (defauled to 0.3) which sets the cutoff difference that is used to highlight the points, and the probability edge estimation 'method' for a cia_collections object. |
A ggplot object or patchwork of ggplot objects.
data <- bnlearn::learning.test scorer <- CreateScorer(scorer = BNLearnScorer, data = data) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer, n_parallel_chains = 2) dags <- PartitiontoDAG(results, scorer) p_edge <- CalculateEdgeProbabilities(dags) PlotConcordance(p_edge)
data <- bnlearn::learning.test scorer <- CreateScorer(scorer = BNLearnScorer, data = data) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer, n_parallel_chains = 2) dags <- PartitiontoDAG(results, scorer) p_edge <- CalculateEdgeProbabilities(dags) PlotConcordance(p_edge)
Plot cumulative mean trace plot.
PlotCumulativeMeanTrace( x, ncol = NULL, nrow = NULL, scales = "fixed", dir = "v" )
PlotCumulativeMeanTrace( x, ncol = NULL, nrow = NULL, scales = "fixed", dir = "v" )
x |
A posterior predictive sample object. |
ncol |
Number of columns. |
nrow |
Number of rows. |
scales |
Whether the scales should the fixed ('fixed', the default), free ('free') or free in one dimension ('free_x', 'free_y')? |
dir |
Direction to fill facets. Either 'h' for horizontal or 'v' for vertical. |
A ggplot object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Sample the edge probability. p_edge <- function(dag) { return(as.vector(dag)) } pedge_sample <- SamplePosteriorPredictiveChains(dag_chains, p_edge) PlotCumulativeMeanTrace(pedge_sample, nrow = length(data), ncol = length(data))
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Sample the edge probability. p_edge <- function(dag) { return(as.vector(dag)) } pedge_sample <- SamplePosteriorPredictiveChains(dag_chains, p_edge) PlotCumulativeMeanTrace(pedge_sample, nrow = length(data), ncol = length(data))
Plot the score trace
PlotScoreTrace( chains, attribute = "log_score", n_burnin = 0, same_plot = TRUE, col = NULL )
PlotScoreTrace( chains, attribute = "log_score", n_burnin = 0, same_plot = TRUE, col = NULL )
chains |
MCMC chains. |
attribute |
Name of attribute to plot. Default is "log_score". |
n_burnin |
Number of steps to remove as burnin. |
same_plot |
Whether to plot on the same figure or on multiple figures. |
col |
A string representing a color for a single chain or a vector of strings to cycle through for multiple chains. |
Depending on the argument 'same_plot', either:
A single 'ggplot' object combining all chains into one plot
A list of 'ggplot' objects, each corresponding to a separate chain
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) # Plot partition score trace. PlotScoreTrace(results) # Plot DAG score trace. dag_chains <- PartitiontoDAG(results, scorer) PlotScoreTrace(dag_chains)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) # Plot partition score trace. PlotScoreTrace(results) # Plot DAG score trace. dag_chains <- PartitiontoDAG(results, scorer) PlotScoreTrace(dag_chains)
This allows you to remove a burnin and thin the chains after processing. This is mostly redundant as you can now index the cia_chain(s) objects directly.
PostProcessChains(chains, n_burnin = 0, n_thin = 1)
PostProcessChains(chains, n_burnin = 0, n_thin = 1)
chains |
cia_chain(s) object. |
n_burnin |
Number of steps to remove at the start as a burnin. Default is 0. |
n_thin |
Number of steps between retained states. Default is 1. |
A cia_chain(s) object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(100, init_state, PartitionMCMC(), scorer) thinned_results <- PostProcessChains(results, n_thin = 2)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(100, init_state, PartitionMCMC(), scorer) thinned_results <- PostProcessChains(results, n_thin = 2)
Sample chains
SampleChains( n_results, init_state, transition, scorer, n_thin = 1, n_parallel_chains = 2 )
SampleChains( n_results, init_state, transition, scorer, n_thin = 1, n_parallel_chains = 2 )
n_results |
Number of saved states per chain. |
init_state |
An initial state that can be passed to transition. This can be a single state or a list of states for each parallel chain. |
transition |
A transition function. |
scorer |
A scorer object. |
n_thin |
Number of steps between saved states. |
n_parallel_chains |
Number of chains to run in parallel. Default is 2. |
A cia_chains object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer)
Sample edge probabilities
SampleEdgeProbabilities(x)
SampleEdgeProbabilities(x)
x |
A chain(s) or collection object where states are DAGs. |
p_edge A posterior sample for the marginalised edge probabilities.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains)
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) pedge_sample <- SampleEdgeProbabilities(dag_chains)
Simulate samples from a posterior predictive distribution for a feature
a graph
.
SamplePosteriorPredictiveChains(x, p_predict, ...)
SamplePosteriorPredictiveChains(x, p_predict, ...)
x |
A cia_chain(s) object. |
p_predict |
A function that draws from the posterior predictive distribution of interest given an adjacency matrix representing a DAG. The function must be of the form p_predict(dag, ...) and return either a vector of numeric values. |
... |
Parameters to be passed to p_predict. |
A cia_post_chain(s) object.
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Sample the edge probability. SamplePosteriorPredictiveChains(dag_chains, function(dag) { return(dag) })
data <- bnlearn::learning.test scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) init_state <- InitPartition(colnames(data), scorer) results <- SampleChains(10, init_state, PartitionMCMC(), scorer) dag_chains <- PartitiontoDAG(results, scorer) # Sample the edge probability. SamplePosteriorPredictiveChains(dag_chains, function(dag) { return(dag) })
Score DAG.
ScoreDAG(dag, scorer)
ScoreDAG(dag, scorer)
dag |
Adjacency matrix of (parent, child) entries with 1 denoting an edge and 0 otherwise. |
scorer |
Scorer object. |
Log of DAG score.
dag <- UniformlySampleDAG(names(bnlearn::asia)) scorer <- CreateScorer(data = bnlearn::asia) ScoreDAG(dag, scorer)
dag <- UniformlySampleDAG(names(bnlearn::asia)) scorer <- CreateScorer(data = bnlearn::asia) ScoreDAG(dag, scorer)
Score labelled partition
ScoreLabelledPartition(partitioned_nodes, scorer)
ScoreLabelledPartition(partitioned_nodes, scorer)
partitioned_nodes |
Labelled partition. |
scorer |
Scorer object. |
Log of the node score.
data <- bnlearn::learning.test dag <- UniformlySampleDAG(names(data)) partitioned_nodes <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) ScoreLabelledPartition(partitioned_nodes, scorer)
data <- bnlearn::learning.test dag <- UniformlySampleDAG(names(data)) partitioned_nodes <- DAGtoPartition(dag) scorer <- CreateScorer( scorer = BNLearnScorer, data = data ) ScoreLabelledPartition(partitioned_nodes, scorer)
Convert to bnlearn object.
toBNLearn(x)
toBNLearn(x)
x |
An object that represents a DAG. |
bn_obj A bn object.
adj <- UniformlySampleDAG(c('A', 'B', 'C')) toBNLearn(adj)
adj <- UniformlySampleDAG(c('A', 'B', 'C')) toBNLearn(adj)
Convert to a gRain object.
togRain(x, ...)
togRain(x, ...)
x |
An adjacency matrix or igraph object. |
... |
extra parameters to gRain compile. |
A gRain object.
dag <- bnlearn::model2network("[A][C][F][B|A][D|A:C][E|B:F]") gRain_obj <- togRain(x = dag |> toMatrix(), data = bnlearn::learning.test)
dag <- bnlearn::model2network("[A][C][F][B|A][D|A:C][E|B:F]") gRain_obj <- togRain(x = dag |> toMatrix(), data = bnlearn::learning.test)
Convert a DAG object from other libraries to an adjacency matrix.
toMatrix(network)
toMatrix(network)
network |
A bnlearn or igraph object. |
An adjacency matrix representation of network.
toMatrix(bnlearn::empty.graph(LETTERS[1:6])) toMatrix(igraph::sample_k_regular(10, 2))
toMatrix(bnlearn::empty.graph(LETTERS[1:6])) toMatrix(igraph::sample_k_regular(10, 2))
Uniformly sample DAG
UniformlySampleDAG(nodes)
UniformlySampleDAG(nodes)
nodes |
A vector of node names. |
Adjacency matrix with elements designated as (parent, child).
UniformlySampleDAG(LETTERS[1:3])
UniformlySampleDAG(LETTERS[1:3])