Skip to article frontmatterSkip to article content

Exercise 2: Implementing a simple SBM state and MCMC

Authors: Tiago P. Peixoto1
Affiliations: 1Inverse Complexity Lab
License: CC-BY

The idea of this exercise is to implement the inference of a simple planted partition SBM with a likelihood given by

P(Aλin,λout,b)=ijeλijλijAijAij!,P(A|\lambda_{\text{in}},\lambda_{\text{out}}, \bm b) = \prod_{ij} \frac{\mathrm{e}^{-\lambda_{ij}}\lambda_{ij}^{A_{ij}}}{A_{ij}!},

with

λij=λinδbi,bj+λout(1δbi,bj).\lambda_{ij} = \lambda_{\text{in}}\delta_{b_i,b_j} + \lambda_{\text{out}}(1-\delta_{b_i,b_j}).

The templates below (available from the manual repo; no need to copy and paste.) can be used to make use of the functionalities already available in graph-tool.

The first file defines a C++ state object that receives a series of parameters.

example.hh
#ifndef EXAMPLE_HH
#define EXAMPLE_HH

#include "config.h"

#include <vector>

#include "graph_tool.hh"
#include "random.hh"
#include "inference/support/util.hh"
#include "inference/support/graph_state.hh"

namespace graph_tool
{
using namespace boost;
using namespace std;

// parameters to the description length
struct eargs_t
{
    bool param1 = false;
    double param2 = 42;
};

typedef int64_t group_t;
typedef vprop_map_t<group_t> vmap_t;

#define EXAMPLE_STATE_params                                                   \
   ((g, &, decltype(all_graph_views), 1))                                      \
   ((b,, vmap_t, 0))

GEN_STATE_BASE(ExampleStateBase, EXAMPLE_STATE_params)

template <class... Ts>
class ExampleState
    : public ExampleStateBase<Ts...>
{
public:
    GET_PARAMS_USING(ExampleStateBase<Ts...>, EXAMPLE_STATE_params)
    GET_PARAMS_TYPEDEF(Ts, EXAMPLE_STATE_params)
    using typename ExampleStateBase<Ts...>::args_t;
    using ExampleStateBase<Ts...>::dispatch_args;

    template <class... ATs,
              typename std::enable_if_t<sizeof...(ATs) == sizeof...(Ts)>* = nullptr>
    ExampleState(ATs&&... args)
        : ExampleStateBase<Ts...>(std::forward<ATs>(args)...)
    {
        // the paramters here are accessible as members with underscores, e.g.
        // _g, _b, etc.
    }

    // Computes the DL difference of moving vertex from group r to nr
    double virtual_move(size_t v, group_t r, group_t nr, eargs_t& ea)
    {
        return 0;
    }

    // Moves vertex v to group s
    void move_vertex(size_t v, group_t s)
    {
        return;
    }

    // Sample a new group membership for vertex v
    group_t sample_group(size_t v, rng_t& rng)
    {
        return 0;
    }

    // Calculate the log-probability of moving vertex v from group r to s (or
    // the reverse probability if reverse == true)
    double get_move_lprob(size_t v, group_t r, group_t s,
                          bool reverse)
    {
        // warning: if reverse == true, the probability returned should match
        // the state which is identical to the current one *except* that the
        // membership of vertex v should be considered to be equal to s, and the
        // proposal would move it to r.
        return 0;
    }

    double entropy(const eargs_t& ea)
    {
        return 0;
    }
};
} // graph_tool namespace

#endif //EXAMPLE_HH

The second file is a compilation unit that takes care of instantiating the state above when this is requested from Python. This is just boilerplate, and does not need to be modified.

example.cc
#include <boost/python.hpp>

#include "example.hh"

using namespace boost;
using namespace graph_tool;

GEN_DISPATCH(example_state, ExampleState, EXAMPLE_STATE_params)

python::object make_example_state(boost::python::object ostate)
{
    python::object state;
    example_state::make_dispatch(ostate,
                                  [&](auto& s){state = python::object(s);});
    return state;
}

#define __MOD__ example
#define DEF_REGISTRY
#include "module_registry.hh"

BOOST_PYTHON_MODULE(libexample)
{
    using namespace boost::python;
    def("make_example_state", &make_example_state);

    example_state::dispatch
        ([&](auto* s)
         {
             typedef typename std::remove_reference<decltype(*s)>::type state_t;
             class_<state_t, bases<>, std::shared_ptr<state_t>>
                 c(name_demangle(typeid(state_t).name()).c_str(),
                   no_init);
             c.def("virtual_move", &state_t::virtual_move)
                 .def("move_vertex", &state_t::move_vertex)
                 .def("entropy", &state_t::entropy);
         });

    class_<eargs_t>("eargs")
        .def_readwrite("param1", &eargs_t::param1)
        .def_readwrite("param2", &eargs_t::param2);

    __MOD__::EvokeRegistry();
}

The MCMC itself requires us to implement the following template, which is relatively straightforward:

example_mcmc.hh
#ifndef EXAMPLE_MCMC_HH
#define EXAMPLE_MCMC_HH

#include "config.h"

#include <vector>

#include "graph_tool.hh"
#include "random.hh"
#include "inference/support/graph_state.hh"
#include "inference/loops/mcmc_loop.hh"

namespace graph_tool
{
using namespace boost;
using namespace std;

#define MCMC_EXAMPLE_STATE_params(State)                                       \
    ((__class__,&, decltype(hana::tuple_t<python::object>), 1))                \
    ((state, &, State&, 0))                                                    \
    ((beta,, double, 0))                                                       \
    ((entropy_args,, eargs_t, 0))                                              \
    ((sequential,, bool, 0))                                                   \
    ((deterministic,, bool, 0))                                                \
    ((verbose,, int, 0))                                                       \
    ((niter,, size_t, 0))


template <class State>
struct MCMC
{
    GEN_STATE_BASE(MCMCExampleStateBase, MCMC_EXAMPLE_STATE_params(State))

    template <class... Ts>
    class MCMCExampleState
        : public MCMCExampleStateBase<Ts...>,
          public MetropolisStateBase
    {
    public:
        GET_PARAMS_USING(MCMCExampleStateBase<Ts...>,
                         MCMC_EXAMPLE_STATE_params(State))
        GET_PARAMS_TYPEDEF(Ts, MCMC_EXAMPLE_STATE_params(State))

        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCExampleState(ATs&&... as)
           : MCMCExampleStateBase<Ts...>(as...),
            _vlist(num_vertices(_state._g))
        {
            std::iota(_vlist.begin(), _vlist.end(), 0);
        }

        std::vector<size_t> _vlist;
        constexpr static group_t _null_move = std::numeric_limits<group_t>::max();

        auto node_state(size_t v)
        {
            return _state._b[v];
        }

        template <class RNG>
        auto move_proposal(size_t v, RNG& rng)
        {
            auto s = _state.sample_group(v, rng);
            if (s == node_state(v))
                return _null_move;
            return s;
        }

        std::tuple<double, double>
        virtual_move_dS(size_t v, size_t s)
        {
            size_t r =  node_state(v);
            if (r == s)
                return {0., 0.};

            double dS = _state.virtual_move(v, r, s, _entropy_args);
            double a = 0;
            if (!std::isinf(_beta))
            {
                double pf = _state.get_move_lprob(v, r, s, false);
                double pb = _state.get_move_lprob(v, s, r, true);
                a = pb - pf;
            }
            return {dS, a};
        }

        void perform_move(size_t v, group_t s)
        {
            _state.move_vertex(v, s);
        }

        bool is_deterministic()
        {
            return _deterministic;
        }

        bool is_sequential()
        {
            return _sequential;
        }

        auto& get_vlist()
        {
            return _vlist;
        }

        double get_beta()
        {
            return _beta;
        }

        size_t get_niter()
        {
            return _niter;
        }
    };
};


} // graph_tool namespace

#endif //EXAMPLE_MCMC_HH

As well as its compilation unit:

example_mcmc.c
#include "graph_tool.hh"
#include "random.hh"

#include <boost/python.hpp>

#include "example.hh"
#include "example_mcmc.hh"

using namespace boost;
using namespace graph_tool;

GEN_DISPATCH(example_state, ExampleState, EXAMPLE_STATE_params)

template <class State>
GEN_DISPATCH(mcmc_example_state, MCMC<State>::template MCMCExampleState,
             MCMC_EXAMPLE_STATE_params(State))

python::object example_mcmc_sweep(python::object omcmc_state,
                                  python::object oexample_state,
                                  rng_t& rng)
{
    python::object ret;
    example_state::dispatch
        (oexample_state,
         [&](auto& example_state)
         {
             typedef typename std::remove_reference<decltype(example_state)>::type
                 state_t;

             mcmc_example_state<state_t>::make_dispatch
                 (omcmc_state,
                  [&](auto& s)
                  {
                      auto ret_ = mcmc_sweep(*s, rng);
                      ret = tuple_apply([&](auto&... args){ return python::make_tuple(args...); }, ret_);
                  });
         });
    return ret;
}

#define __MOD__ example
#include "module_registry.hh"
REGISTER_MOD
([]
{
    using namespace boost::python;
    def("example_mcmc_sweep", &example_mcmc_sweep);
});

Finally, we need a a Python file that binds everything from the C++ side.

example.py
import graph_tool
from graph_tool import Graph, GraphView, _get_rng, _prop, PropertyMap
from graph_tool.inference.base_states import EntropyState, MCMCState, \
    MultiflipMCMCState, MultilevelMCMCState, DrawBlockState, copy_state_wrap, \
    entropy_state_signature
from graph_tool.inference.util import *
import numpy as np

import libexample

@entropy_state_signature
class ExampleState(MCMCState, MultiflipMCMCState, MultilevelMCMCState,
                   DrawBlockState):
    r"""FIXME: write the documentation"""

    def __init__(self, g, b=None, entropy_args={}, **kwargs):
        EntropyState.__init__(self, entropy_args=entropy_args)
        self.g = g
        if b is None:
            b = g.new_vp("int64_t")
        self.b = b
        self._state = libexample.make_example_state(self)

    def __copy__(self):
        return self.copy()

    def copy(self, **kwargs):
        r"""Copies the state. The parameters override the state properties, and
         have the same meaning as in the constructor."""
        args = self.__getstate__()
        args.update(**kwargs)
        return ExampleState(**args)

    def __getstate__(self):
        state = EntropyState.__getstate__(self)
        return dict(state, g=self.g, b=self.b)

    def __setstate__(self, state):
        self.__init__(**state)

    @copy_state_wrap
    def _entropy(self, param1=False, param2=42., **kwargs):
        r"""Return the description length (negative joint log-likelihood)."""

        eargs = self._get_entropy_args(locals())

        S = self._state.entropy(eargs)

        if len(kwargs) > 0:
            raise ValueError("unrecognized keyword arguments: " +
                             str(list(kwargs.keys())))
        return S

    def _gen_eargs(self, args):
        return libexample.eargs()

    def _mcmc_sweep_dispatch(self, mcmc_state):
        return libexample.example_mcmc_sweep(mcmc_state, self._state,
                                             _get_rng())

    def _multiflip_mcmc_sweep_dispatch(self, mcmc_state):
        return libexample.example_multiflip_mcmc_sweep(mcmc_state,
                                                       self._state,
                                                       _get_rng())

    def _multilevel_mcmc_sweep_dispatch(self, mcmc_state):
        return libexample.example_multilevel_mcmc_sweep(mcmc_state,
                                                        self._state,
                                                        _get_rng())

The Makefile below facilitates the compilation:

Makefile
CXX=g++
CXXFLAGS=-O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG
LDFLAGS=`pkg-config --libs graph-tool-py` -shared

SOURCES=$(shell echo *.cc)
HEADERS=$(shell echo *.hh)
OBJECTS=$(SOURCES:.cc=.o)

TARGET=libexample.so

all: $(TARGET)

clean:
	rm -f $(OBJECTS) $(TARGET)

$(OBJECTS): %.o: %.cc %.hh example.hh
	$(CXX) $(CXXFLAGS) -c $< -o $@

$(TARGET) : $(OBJECTS)
	$(CXX) $(CXXFLAGS) $(OBJECTS) -o $@ $(LDFLAGS)

We need to compile the C++ code by typing make:

$ make
g++ -O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG -c example.cc -o example.o
g++ -O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG -c example_mcmc.cc -o example_mcmc.o
g++ -O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG -c example_multiflip_mcmc.cc -o example_multiflip_mcmc.o
g++ -O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG -c example_multilevel_mcmc.cc -o example_multilevel_mcmc.o
g++ -O3 -fopenmp -std=gnu++17 -Wall -fPIC -DPIC `pkg-config --cflags graph-tool-py` -DBOOST_ALLOW_DEPRECATED_HEADERS -DNDEBUG example.o example_mcmc.o example_multiflip_mcmc.o example_multilevel_mcmc.o -o libexample.so `pkg-config --libs graph-tool-py` -shared

Now we are ready to call the function above from Python:

from graph_tool.all import *
from example import ExampleState

g = collection.data["dolphins"]

state = ExampleState(g)
state.mcmc_sweep()
(0.0, 0, 0)

Task 1: Modify the code above to implement the SBM of Eq (1).

Task 2a: Modify the code below to implement a merge-split MCMC

example_multiflip_mcmc.hh
#ifndef EXAMPLE_MULTIFLIP_MCMC_HH
#define EXAMPLE_MULTIFLIP_MCMC_HH

#include "config.h"

#include <vector>
#include <algorithm>

#include "graph_tool.hh"
#include "inference/support/graph_state.hh"
#include "inference/support/util.hh"

#include "example.hh"

#include "idx_map.hh"
#include "inference/loops/merge_split.hh"

namespace graph_tool
{
using namespace boost;
using namespace std;

#define MCMC_EXAMPLE_STATE_params(State)                                       \
    ((__class__,&, decltype(hana::tuple_t<python::object>), 1))                \
    ((state, &, State&, 0))                                                    \
    ((beta,, double, 0))                                                       \
    ((psingle,, double, 0))                                                    \
    ((psplit,, double, 0))                                                     \
    ((pmerge,, double, 0))                                                     \
    ((pmergesplit,, double, 0))                                                \
    ((pmovelabel,, double, 0))                                                 \
    ((nproposal, &, vector<size_t>&, 0))                                       \
    ((nacceptance, &, vector<size_t>&, 0))                                     \
    ((gibbs_sweeps,, size_t, 0))                                               \
    ((entropy_args,, eargs_t, 0))                                              \
    ((verbose,, int, 0))                                                       \
    ((force_move,, bool, 0))                                                   \
    ((niter,, double, 0))

template <class State>
struct MCMC
{
    GEN_STATE_BASE(MCMCExampleStateBase, MCMC_EXAMPLE_STATE_params(State))

    template <class... Ts>
    class MCMCExampleStateImp
        : public MCMCExampleStateBase<Ts...>,
          public MergeSplitStateBase
    {
    public:
        GET_PARAMS_USING(MCMCExampleStateBase<Ts...>,
                         MCMC_EXAMPLE_STATE_params(State))
        GET_PARAMS_TYPEDEF(Ts, MCMC_EXAMPLE_STATE_params(State))

        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCExampleStateImp(ATs&&... as)
           : MCMCExampleStateBase<Ts...>(as...)
        {
        }

        constexpr static group_t _null_group = std::numeric_limits<group_t>::max();

        constexpr static double _psrandom = 1;
        constexpr static double _psscatter = 1;
        constexpr static double _pscoalesce = 1;

        template <class F>
        void iter_nodes(F&& f)
        {
            for (auto v : vertices_range(_state._g))
                f(v);
        }

        template <class F>
        void iter_groups(F&& f)
        {
            // FIXME: This should iterate overa all groups and call the function
            // f on each of them
        }

        auto get_group(size_t v)
        {
            return _state._b[v];
        }

        template <bool sample_branch=true, class RNG, class VS = std::array<group_t,0>>
        group_t sample_new_group(size_t, RNG& rng, VS&& except = VS())
        {
            // FIXME: This should return an empty group, with a value not
            // contained in 'except'.
            return 0;
        }

        void move_node(size_t v, group_t r, bool)
        {
            _state.move_vertex(v, r);
        }

        double virtual_move(size_t v, group_t r, group_t s)
        {
            return _state.virtual_move(v, r, s, _entropy_args);
        }

        template <class RNG>
        auto sample_group(size_t v, bool, RNG& rng)
        {
            return _state.sample_group(v, rng);
        }

        double get_move_prob(size_t v, group_t r, group_t s, bool, bool reverse)
        {
            return _state.get_move_lprob(v, r, s, reverse);
        }
    };

    using gmap_t = idx_map<size_t, idx_set<size_t, true>, false, true, true>;

    template <class T>
    using iset = idx_set<T>;

    template <class T, class V>
    using imap = idx_map<T, V>;

    template <class... Ts>
    class MCMCExampleState:
        public MergeSplit<MCMCExampleStateImp<Ts...>,
                          size_t,
                          group_t,
                          iset,
                          imap,
                          iset,
                          gmap_t, false, true>
    {
    public:
        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCExampleState(ATs&&... as)
           : MergeSplit<MCMCExampleStateImp<Ts...>,
                        size_t,
                        group_t,
                        iset,
                        imap,
                        iset,
                        gmap_t, false, true>(as...)
        {}
    };
};

} // graph_tool namespace

#endif //EXAMPLE_MULTIFLIP_MCMC_HH

Task 2b: Modify the code below to implement an agglomerative heuristic

example_multilevel_mcmc.hh
#ifndef EXAMPLE_MULTILEVEL_MCMC_HH
#define EXAMPLE_MULTILEVEL_MCMC_HH

#include "config.h"

#include <vector>
#include <algorithm>

#include "graph_tool.hh"
#include "inference/support/graph_state.hh"

#include "example.hh"

#include "idx_map.hh"
#include "inference/loops/multilevel.hh"

namespace graph_tool
{
using namespace boost;
using namespace std;

typedef vprop_map_t<int64_t> vmap_t;

#define MCMC_EXAMPLE_STATE_params(State)                                       \
    ((__class__,&, decltype(hana::tuple_t<python::object>), 1))                \
    ((state, &, State&, 0))                                                    \
    ((beta,, double, 0))                                                       \
    ((r,, double, 0))                                                          \
    ((random_bisect,, bool, 0))                                                \
    ((merge_sweeps,, size_t, 0))                                               \
    ((mh_sweeps,, size_t, 0))                                                  \
    ((parallel,, bool, 0))                                                     \
    ((init_min_iter,, size_t, 0))                                              \
    ((init_r,, double, 0))                                                     \
    ((init_beta,, double, 0))                                                  \
    ((gibbs,, bool, 0))                                                        \
    ((M,, size_t, 0))                                                          \
    ((global_moves,, bool, 0))                                                 \
    ((cache_states,, bool, 0))                                                 \
    ((B_min,, size_t, 0))                                                      \
    ((B_max,, size_t, 0))                                                      \
    ((b_min,, vmap_t, 0))                                                      \
    ((b_max,, vmap_t, 0))                                                      \
    ((force_accept,, bool, 0))                                                 \
    ((entropy_args,, eargs_t, 0))                                              \
    ((verbose,, int, 0))                                                       \
    ((niter,, size_t, 0))

template <class State>
struct MCMC
{
    GEN_STATE_BASE(MCMCExampleStateBase, MCMC_EXAMPLE_STATE_params(State))

    template <class... Ts>
    class MCMCExampleStateImp
        : public MCMCExampleStateBase<Ts...>,
          public MultivelStateBase
    {
    public:
        GET_PARAMS_USING(MCMCExampleStateBase<Ts...>,
                         MCMC_EXAMPLE_STATE_params(State))
        GET_PARAMS_TYPEDEF(Ts, MCMC_EXAMPLE_STATE_params(State))

        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCExampleStateImp(ATs&&... as)
        : MCMCExampleStateBase<Ts...>(as...)
        {
            if (_global_moves)
            {
                idx_set<group_t> rs_min, rs_max;
                for (auto v : vertices_range(_state._g))
                {
                    rs_min.insert(_b_min[v]);
                    rs_max.insert(_b_max[v]);
                }
                _has_b_min = rs_min.size() == _B_min;
                _has_b_max = rs_max.size() == _B_max;
            }
        }

        bool _has_b_max = false;
        bool _has_b_min = false;

        idx_set<group_t> _rs;

        constexpr static group_t _null_group = std::numeric_limits<group_t>::max();

        template <class F>
        void iter_nodes(F&& f)
        {
            for (auto v : vertices_range(_state._g))
                f(v);
        }

        template <class F>
        void iter_groups(F&& f)
        {
            // FIXME: This should iterate overa all groups and call the function
            // f on each of them
        }

        auto get_group(size_t v)
        {
            return _state._b[v];
        }

        template <class RNG>
        group_t get_new_group(size_t v, bool, RNG& rng)
        {
            // FIXME: This should return a new, empty group.
            return 0;
        }

        void move_node(size_t v, group_t r, bool)
        {
            _state.move_vertex(v, r);
        }

        double virtual_move(size_t v, group_t r, group_t s)
        {
            return _state.virtual_move(v, r, s, _entropy_args);
        }

        group_t get_b_min(size_t v)
        {
            return _b_min[v];
        }

        group_t get_b_max(size_t v)
        {
            return _b_max[v];
        }

        template <class RNG>
        group_t sample_group(size_t v, bool allow_random, bool,
                             bool init_heuristic, RNG& rng)
        {
            return _state.sample_group(v, rng);
        }

        double get_move_prob(size_t v, group_t r, group_t s, bool allow_random,
                             bool, bool reverse)
        {
            return _state.get_move_lprob(v, r, s, reverse);
        }

        double entropy()
        {
            return _state.entropy(_entropy_args);
        }
    };

    using gmap_t = idx_map<group_t, idx_set<size_t, true>, false, true, true>;

    template <class T>
    using iset = idx_set<T>;

    template <class T, class V>
    using imap = idx_map<T, V>;

    template <class... Ts>
    class MCMCExampleState:
        public Multilevel<MCMCExampleStateImp<Ts...>,
                          size_t,
                          group_t,
                          iset,
                          imap,
                          iset,
                          imap,
                          gmap_t, false>
    {
    public:
        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCExampleState(ATs&&... as)
           : Multilevel<MCMCExampleStateImp<Ts...>,
                        size_t,
                        group_t,
                        iset,
                        imap,
                        iset,
                        imap,
                        gmap_t, false>(as...)
        {}
    };
};

} // graph_tool namespace

#endif //EXAMPLE_MULTILEVEL_MCMC_HH

Add a link here to your solution as a gitlab repository.