diff --git a/pgmpy.ipynb b/pgmpy.ipynb index 1325805..5813f0d 100644 --- a/pgmpy.ipynb +++ b/pgmpy.ipynb @@ -1,701 +1,3050 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 78, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from pgmpy.factors import TabularCPD\n", - "from pgmpy.models import BayesianModel" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setting up your model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### First, set the structure" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "olympic_model = BayesianModel([('Genetics', 'OlympicTrials'),\n", - " ('Practice', 'OlympicTrials'),\n", - " ('OlympicTrials', 'Offer')])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Then set up the relationships (the CPDs)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "genetics_cpd = TabularCPD(\n", - " variable = 'Genetics',\n", - " variable_card = 2,\n", - " values = [[.2,.8]])" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "practice_cpd = TabularCPD(\n", - " variable = 'Practice',\n", - " variable_card = 2,\n", - " values = [[.7,.3]])" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "offer_cpd = TabularCPD(\n", - " variable = 'Offer',\n", - " variable_card = 2,\n", - " values = [[.95, .8, .5],\n", - " [.05, .2, .5]],\n", - " evidence = ['OlympicTrials'],\n", - " evidence_card = 3)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "olympic_trials_cpd = TabularCPD(\n", - " variable = 'OlympicTrials', \n", - " variable_card = 3,\n", - " values = [[.5, .8, .8, .9],\n", - " [.3, .15, .1, .08],\n", - " [.2, .05, .1, .02]],\n", - " evidence = ['Genetics', 'Practice'],\n", - " evidence_card = [2,2])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add the relationships to your models" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Replacing existing CPD for Genetics\n", - "WARNING:root:Replacing existing CPD for Practice\n", - "WARNING:root:Replacing existing CPD for Offer\n", - "WARNING:root:Replacing existing CPD for OlympicTrials\n" - ] - } - ], - "source": [ - "olympic_model.add_cpds (genetics_cpd, practice_cpd, offer_cpd, olympic_trials_cpd)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Examine the structure of your graph" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.get_cpds()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find active trail nodes" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Genetics', 'Offer', 'OlympicTrials'}" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.active_trail_nodes('Genetics')" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Genetics', 'Offer', 'OlympicTrials', 'Practice'}" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.active_trail_nodes('OlympicTrials')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find local independencies" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(Genetics _|_ Practice)" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.local_independencies('Genetics')" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.local_independencies('OlympicTrials')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get all independencies" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(Genetics _|_ Practice)\n", - "(Genetics _|_ Offer | OlympicTrials)\n", - "(Genetics _|_ Offer | Practice, OlympicTrials)\n", - "(Practice _|_ Genetics)\n", - "(Practice _|_ Offer | OlympicTrials)\n", - "(Practice _|_ Offer | Genetics, OlympicTrials)\n", - "(Offer _|_ Genetics, Practice | OlympicTrials)\n", - "(Offer _|_ Practice | Genetics, OlympicTrials)\n", - "(Offer _|_ Genetics | Practice, OlympicTrials)" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_model.get_independencies()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Making inferences" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We can get probability distributions that are not explicitly spelled out in our graphs" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from pgmpy.inference import VariableElimination" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "olympic_infer = VariableElimination(olympic_model)" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒═════════╤══════════════╕\n", - "│ Offer │ phi(Offer) │\n", - "╞═════════╪══════════════╡\n", - "│ Offer_0 │ 0.8898 │\n", - "├─────────┼──────────────┤\n", - "│ Offer_1 │ 0.1102 │\n", - "╘═════════╧══════════════╛\n" - ] - } - ], - "source": [ - "prob_offer = olympic_infer.query(variables = ['Offer'])\n", - "print(prob_offer['Offer'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We can also get conditional probability distributions that take into account what we already know" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "prob_offer_good_genes = olympic_infer.query(\n", - " variables = ['Offer', 'Genetics'])\n", - "print(prob_offer_good_genes['Genetics'])\n", - "print(prob_offer_good_genes['Offer'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "prob_offer_good_genes = olympic_infer.query(\n", - " variables = ['Offer', 'OlympicTrials'])\n", - "print(prob_offer_good_genes['OlympicTrials'])\n", - "print(prob_offer_good_genes['Offer'])" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒═════════╤══════════════╕\n", - "│ Offer │ phi(Offer) │\n", - "╞═════════╪══════════════╡\n", - "│ Offer_0 │ 0.9017 │\n", - "├─────────┼──────────────┤\n", - "│ Offer_1 │ 0.0983 │\n", - "╘═════════╧══════════════╛\n" - ] - } - ], - "source": [ - "prob_offer_bad_genes = olympic_infer.query(\n", - " variables = ['Offer'], \n", - " evidence = {'Genetics':1})\n", - "print(prob_offer_bad_genes['Offer'])" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒═════════╤══════════════╕\n", - "│ Offer │ phi(Offer) │\n", - "╞═════════╪══════════════╡\n", - "│ Offer_0 │ 0.8420 │\n", - "├─────────┼──────────────┤\n", - "│ Offer_1 │ 0.1580 │\n", - "╘═════════╧══════════════╛\n" - ] - } - ], - "source": [ - "prob_offer_good_genes = olympic_infer.query(\n", - " variables = ['Offer'], \n", - " evidence = {'Genetics':0})\n", - "print(prob_offer_good_genes['Offer'])" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒═════════╤══════════════╕\n", - "│ Offer │ phi(Offer) │\n", - "╞═════════╪══════════════╡\n", - "│ Offer_0 │ 0.8150 │\n", - "├─────────┼──────────────┤\n", - "│ Offer_1 │ 0.1850 │\n", - "╘═════════╧══════════════╛\n" - ] - } - ], - "source": [ - "prob_offer_good_genes_did_practice = olympic_infer.query(\n", - " variables = ['Offer'], \n", - " evidence = {'Genetics':0, 'Practice':0})\n", - "print(prob_offer_good_genes_did_practice['Offer'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### You can also go upstream logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genetics." - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒════════════╤═════════════════╕\n", - "│ Genetics │ phi(Genetics) │\n", - "╞════════════╪═════════════════╡\n", - "│ Genetics_0 │ 0.3377 │\n", - "├────────────┼─────────────────┤\n", - "│ Genetics_1 │ 0.6623 │\n", - "╘════════════╧═════════════════╛\n" - ] - } - ], - "source": [ - "prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(\n", - " variables = ['Genetics'], \n", - " evidence = {'OlympicTrials':2})\n", - "print(prob_good_genes_if_amazing_olympic_trials['Genetics'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Some variables are only informative about other variables given 'third' variables" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒════════════╤═════════════════╕\n", - "│ Genetics │ phi(Genetics) │\n", - "╞════════════╪═════════════════╡\n", - "│ Genetics_0 │ 0.2000 │\n", - "├────────────┼─────────────────┤\n", - "│ Genetics_1 │ 0.8000 │\n", - "╘════════════╧═════════════════╛\n" - ] - } - ], - "source": [ - "# Practice does not inherently tell us something about Genetics\n", - "prob_good_genes_if_no_practice = olympic_infer.query(\n", - " variables = ['Genetics'], \n", - " evidence = {'Practice':1})\n", - "print(prob_good_genes_if_no_practice['Genetics'])" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "╒════════════╤═════════════════╕\n", - "│ Genetics │ phi(Genetics) │\n", - "╞════════════╪═════════════════╡\n", - "│ Genetics_0 │ 0.3846 │\n", - "├────────────┼─────────────────┤\n", - "│ Genetics_1 │ 0.6154 │\n", - "╘════════════╧═════════════════╛\n" - ] - } - ], - "source": [ - "# BUT Practice does tell us something about genetics IF we also \n", - "# know something about olympic trials performance\n", - "prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(\n", - " variables = ['Genetics'], \n", - " evidence = {'Practice':1,\n", - " 'OlympicTrials':2})\n", - "print(prob_good_genes_if_no_practice_good_olympic_trials['Genetics'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We can find out the most probable state for a variable" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Genetics': 1}" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "olympic_infer.map_query(variables = ['Genetics'])" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Offer': 0}" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" + "cells": [ + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "id": "ItQU7eI-iz-V" + }, + "outputs": [], + "source": [ + "#https://www.youtube.com/watch?v=DEHqIxX1Kq4\n", + "#https://github.com/AileenNielsen/PyGotham_2016_Probabilistic_Graphical_Models\n", + "\n", + "from pgmpy.factors.discrete import TabularCPD\n", + "from pgmpy.models import BayesianNetwork" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x9SjxHlIiz-W" + }, + "source": [ + "# Setting up your model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "unyPS92siz-X" + }, + "source": [ + "### First, set the structure" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "id": "0yUdLCcriz-X" + }, + "outputs": [], + "source": [ + "olympic_model = BayesianNetwork([('Genetics', 'OlympicTrials'),\n", + " ('Practice', 'OlympicTrials'),\n", + " ('OlympicTrials', 'Offer')])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_qfNItTiz-X" + }, + "source": [ + "### Then set up the relationships (the CPDs)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": true, + "id": "DomgMbORiz-X" + }, + "outputs": [], + "source": [ + "genetics_cpd = TabularCPD(\n", + " variable = 'Genetics',\n", + " variable_card = 2,\n", + " values = [[.2],[.8]])" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": true, + "id": "BtcOYNL6iz-X" + }, + "outputs": [], + "source": [ + "practice_cpd = TabularCPD(\n", + " variable = 'Practice',\n", + " variable_card = 2,\n", + " values = [[.7],[.3]])" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": true, + "id": "ju7g-sVSiz-X" + }, + "outputs": [], + "source": [ + "offer_cpd = TabularCPD(\n", + " variable = 'Offer',\n", + " variable_card = 2,\n", + " values = [[.95, .8, .5],\n", + " [.05, .2, .5]],\n", + " evidence = ['OlympicTrials'],\n", + " evidence_card = [3])" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "id": "DnuoZAlKiz-X" + }, + "outputs": [], + "source": [ + "olympic_trials_cpd = TabularCPD(\n", + " variable = 'OlympicTrials',\n", + " variable_card = 3,\n", + " values = [[.5, .8, .8, .9],\n", + " [.3, .15, .1, .08],\n", + " [.2, .05, .1, .02]],\n", + " evidence = ['Genetics', 'Practice'],\n", + " evidence_card = [2,2])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HKR3eAdIiz-X" + }, + "source": [ + "### Add the relationships to your models" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "id": "Rf1_cPEGiz-Y" + }, + "outputs": [], + "source": [ + "olympic_model.add_cpds (genetics_cpd, practice_cpd, offer_cpd, olympic_trials_cpd)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tHDa2Y8Kiz-Y" + }, + "source": [ + "### Examine the structure of your graph" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wYxgSJf-iz-Y", + "outputId": "b4461e81-adf5-4f1b-979e-3e16aab6383c" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "metadata": {}, + "execution_count": 63 + } + ], + "source": [ + "olympic_model.get_cpds()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_vIBuc8oiz-Y" + }, + "source": [ + "### Find active trail nodes" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3gY-Pb9Miz-Y", + "outputId": "4a526cd8-e75b-4575-e1e0-ecf5d29b33d3" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'Genetics': {'Genetics', 'Offer', 'OlympicTrials'}}" + ] + }, + "metadata": {}, + "execution_count": 64 + } + ], + "source": [ + "olympic_model.active_trail_nodes('Genetics')" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Hb5IWrG7iz-Y", + "outputId": "6e2a37b6-fdcd-4434-f0ef-3253c0cbcba7" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'OlympicTrials': {'Genetics', 'Offer', 'OlympicTrials', 'Practice'}}" + ] + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "olympic_model.active_trail_nodes('OlympicTrials')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "skT3UGyjiz-Y" + }, + "source": [ + "### Find local independencies" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JCJHnNeAiz-Z", + "outputId": "63cf97e1-211a-4583-bf82-c7bf3e234fa5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Genetics ⟂ Practice)" + ] + }, + "metadata": {}, + "execution_count": 66 + } + ], + "source": [ + "olympic_model.local_independencies('Genetics')" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0kkS4-ukiz-Z", + "outputId": "b8d8036e-32fd-4212-eed4-eb46ef5217e8" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [] + }, + "metadata": {}, + "execution_count": 67 + } + ], + "source": [ + "olympic_model.local_independencies('OlympicTrials')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8tKqT8Iriz-Z" + }, + "source": [ + "### Get all independencies" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gBiE1mBTiz-Z", + "outputId": "addf06f9-0c5c-4604-c248-6d9f732276d2" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Practice ⟂ Genetics)\n", + "(Practice ⟂ Offer | OlympicTrials)\n", + "(Practice ⟂ Offer | Genetics, OlympicTrials)\n", + "(Genetics ⟂ Practice)\n", + "(Genetics ⟂ Offer | OlympicTrials)\n", + "(Genetics ⟂ Offer | Practice, OlympicTrials)\n", + "(Offer ⟂ Practice, Genetics | OlympicTrials)\n", + "(Offer ⟂ Genetics | Practice, OlympicTrials)\n", + "(Offer ⟂ Practice | Genetics, OlympicTrials)" + ] + }, + "metadata": {}, + "execution_count": 68 + } + ], + "source": [ + "olympic_model.get_independencies()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hRnqGrS3iz-Z" + }, + "source": [ + "# Making inferences" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GB7nXleHiz-Z" + }, + "source": [ + "### We can get probability distributions that are not explicitly spelled out in our graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "collapsed": true, + "id": "W9A7fjYbiz-Z" + }, + "outputs": [], + "source": [ + "from pgmpy.inference import VariableElimination" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "id": "XWpT12vGiz-Z" + }, + "outputs": [], + "source": [ + "olympic_infer = VariableElimination(olympic_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L1joJRjAiz-Z", + "outputId": "b207a3c1-2382-4702-9b55-0b6984ccdcf7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+--------------+\n", + "| Offer | phi(Offer) |\n", + "+==========+==============+\n", + "| Offer(0) | 0.8898 |\n", + "+----------+--------------+\n", + "| Offer(1) | 0.1102 |\n", + "+----------+--------------+\n" + ] + } + ], + "source": [ + "prob_offer = olympic_infer.query(variables = ['Offer'])\n", + "print(prob_offer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6QjriA7Qiz-Z" + }, + "source": [ + "### We can also get conditional probability distributions that take into account what we already know" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2bT7iyk-iz-Z", + "outputId": "b9e94ab8-6293-4641-f004-877f548e797a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+-------------+-----------------------+\n", + "| Offer | Genetics | phi(Offer,Genetics) |\n", + "+==========+=============+=======================+\n", + "| Offer(0) | Genetics(0) | 0.1684 |\n", + "+----------+-------------+-----------------------+\n", + "| Offer(0) | Genetics(1) | 0.7214 |\n", + "+----------+-------------+-----------------------+\n", + "| Offer(1) | Genetics(0) | 0.0316 |\n", + "+----------+-------------+-----------------------+\n", + "| Offer(1) | Genetics(1) | 0.0786 |\n", + "+----------+-------------+-----------------------+\n" + ] + } + ], + "source": [ + "prob_offer_good_genes = olympic_infer.query(\n", + " variables = ['Offer', 'Genetics'])\n", + "print(prob_offer_good_genes)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tHPVqUyiiz-Z", + "outputId": "66a1fd1d-4b53-433f-a9ad-81726eedfa5e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+------------------+----------------------------+\n", + "| Offer | OlympicTrials | phi(Offer,OlympicTrials) |\n", + "+==========+==================+============================+\n", + "| Offer(0) | OlympicTrials(0) | 0.7429 |\n", + "+----------+------------------+----------------------------+\n", + "| Offer(0) | OlympicTrials(1) | 0.1010 |\n", + "+----------+------------------+----------------------------+\n", + "| Offer(0) | OlympicTrials(2) | 0.0459 |\n", + "+----------+------------------+----------------------------+\n", + "| Offer(1) | OlympicTrials(0) | 0.0391 |\n", + "+----------+------------------+----------------------------+\n", + "| Offer(1) | OlympicTrials(1) | 0.0252 |\n", + "+----------+------------------+----------------------------+\n", + "| Offer(1) | OlympicTrials(2) | 0.0459 |\n", + "+----------+------------------+----------------------------+\n" + ] + } + ], + "source": [ + "prob_offer_good_genes = olympic_infer.query(\n", + " variables = ['Offer', 'OlympicTrials'])\n", + "print(prob_offer_good_genes)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7N1EBxJEiz-Z", + "outputId": "f6b4e167-a98d-41b3-b428-d77941a96c63" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+--------------+\n", + "| Offer | phi(Offer) |\n", + "+==========+==============+\n", + "| Offer(0) | 0.9017 |\n", + "+----------+--------------+\n", + "| Offer(1) | 0.0983 |\n", + "+----------+--------------+\n" + ] + } + ], + "source": [ + "prob_offer_bad_genes = olympic_infer.query(\n", + " variables = ['Offer'],\n", + " evidence = {'Genetics':1})\n", + "print(prob_offer_bad_genes)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FVW53vYliz-Z", + "outputId": "6a1b6300-6450-43ab-cc99-74dcb874e0c3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+--------------+\n", + "| Offer | phi(Offer) |\n", + "+==========+==============+\n", + "| Offer(0) | 0.8420 |\n", + "+----------+--------------+\n", + "| Offer(1) | 0.1580 |\n", + "+----------+--------------+\n" + ] + } + ], + "source": [ + "prob_offer_good_genes = olympic_infer.query(\n", + " variables = ['Offer'],\n", + " evidence = {'Genetics':0})\n", + "print(prob_offer_good_genes)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mNljwcXfiz-Z", + "outputId": "799293f4-8fd6-42f6-ec6d-a9832f85feec" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+--------------+\n", + "| Offer | phi(Offer) |\n", + "+==========+==============+\n", + "| Offer(0) | 0.8150 |\n", + "+----------+--------------+\n", + "| Offer(1) | 0.1850 |\n", + "+----------+--------------+\n" + ] + } + ], + "source": [ + "prob_offer_good_genes_did_practice = olympic_infer.query(\n", + " variables = ['Offer'],\n", + " evidence = {'Genetics':0, 'Practice':0})\n", + "print(prob_offer_good_genes_did_practice)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0RKUqoiyiz-Z" + }, + "source": [ + "### You can also go upstream logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genetics." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uvPvW46Jiz-a", + "outputId": "17578e87-9902-4842-f1f4-66e52545bdda" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------+-----------------+\n", + "| Genetics | phi(Genetics) |\n", + "+=============+=================+\n", + "| Genetics(0) | 0.3377 |\n", + "+-------------+-----------------+\n", + "| Genetics(1) | 0.6623 |\n", + "+-------------+-----------------+\n" + ] + } + ], + "source": [ + "prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(\n", + " variables = ['Genetics'],\n", + " evidence = {'OlympicTrials':2})\n", + "print(prob_good_genes_if_amazing_olympic_trials)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iJYR4lroiz-a" + }, + "source": [ + "### Some variables are only informative about other variables given 'third' variables" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BV4Y9oYCiz-a", + "outputId": "b876fd61-a86f-4178-e26c-cecbe88639d5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------+-----------------+\n", + "| Genetics | phi(Genetics) |\n", + "+=============+=================+\n", + "| Genetics(0) | 0.2000 |\n", + "+-------------+-----------------+\n", + "| Genetics(1) | 0.8000 |\n", + "+-------------+-----------------+\n" + ] + } + ], + "source": [ + "# Practice does not inherently tell us something about Genetics\n", + "prob_good_genes_if_no_practice = olympic_infer.query(\n", + " variables = ['Genetics'],\n", + " evidence = {'Practice':1})\n", + "print(prob_good_genes_if_no_practice)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9PKikc92iz-a", + "outputId": "5f631541-5629-4159-c8b8-a199668080c4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------+-----------------+\n", + "| Genetics | phi(Genetics) |\n", + "+=============+=================+\n", + "| Genetics(0) | 0.3846 |\n", + "+-------------+-----------------+\n", + "| Genetics(1) | 0.6154 |\n", + "+-------------+-----------------+\n" + ] + } + ], + "source": [ + "# BUT Practice does tell us something about genetics IF we also\n", + "# know something about olympic trials performance\n", + "prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(\n", + " variables = ['Genetics'],\n", + " evidence = {'Practice':1,\n", + " 'OlympicTrials':2})\n", + "print(prob_good_genes_if_no_practice_good_olympic_trials)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fjURRD2biz-a" + }, + "source": [ + "### We can find out the most probable state for a variable" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 98, + "referenced_widgets": [ + "98b77b32ccd449e3867670315d6ec8bf", + "156ed51aa28141dabc68ba275367f076", + "64a0afa2590a4df3a22c1ce15b975b8d", + "9fe61d9242f744088021c14a250f12ef", + "93f9d10c6cee4d8e9b38d36975b10e74", + "7f126230c2f84f5785e1f60bd7017c93", + "e6842a1e1a8f449a95fcab9eac654677", + "ca8fb14aaf4140709fde1ba94d49b5d8", + "ea03c55f07fe4ae1b716e2846b3e6137", + "b24922d75a9347af937b696a39c77982", + "01f0f59418aa44c6aad70776a2f9540f", + "03febd7031f14f9ab256c06222867f25", + "dda629137a0946c5a4dc4264d40ab798", + "84d1785ca847435d9d626cc63a777efb", + "012f175c553640b099de9f7a096b2b2c", + "fb8677c66e7a4678b0ba6d16bd5e38d9", + "a49813527eb14c60a3831a1d19aaa4f1", + "df17c24dd67844a599a0f9c34bce3925", + "f7248cc72b3a459da7dec3ad6c9de651", + "210ee404f4114b1995bff65a7b11079b", + "0570f3a5a2ad4a5c9e1bb327eb5ed410", + "945ce9316bcf4c598c9182a4db0f3a07" + ] + }, + "id": "f-jcHD5uiz-a", + "outputId": "512f0265-5995-4b58-83d5-0a4f9d43b3df" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "0it [00:00, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "98b77b32ccd449e3867670315d6ec8bf" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "0it [00:00, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "03febd7031f14f9ab256c06222867f25" + } + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'Genetics': 1}" + ] + }, + "metadata": {}, + "execution_count": 80 + } + ], + "source": [ + "olympic_infer.map_query(variables = ['Genetics'])" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 98, + "referenced_widgets": [ + "adf5c1ab479c46628dc751b5595b3022", + "dd27afbc44404f3fbc9683be17254b80", + "594e4fe82de548c29b1c3f6c1da7ffd6", + "715f3c7828e34cb5a4e23d0830afc5d5", + "2842a7374f644db0af799a8086661639", + "080bf2d942ab4ab7b51d3e2f7890310f", + "8c9326b5b333466da9c592f7c3df3e39", + "e53c4262366345c69630fb787811a16e", + "05401442064f46389878fab0a3d41d90", + "759f54b5938e48c186eb586b14dcd790", + "ff17282c62b943c4b89e118c3965ecaa", + "d43189e0ea484d2393f292da62c99397", + "d406e899847846599761f1161d066293", + "b7e124c95cda469c972950912a137031", + "42d0ee663b224801b960575fde20f90a", + "ac70cc5b687b48df83f2817ea62d9cae", + "32ae3b24a13a4cbfb5aa752adefb84d3", + "49e6bd4c811d4ff4b44b388793993bae", + "70fa6e82271a4f3582c734afea317dc5", + "d500c01514ca452ca61e8bf07eba540d", + "6a247e7f3d4049959e173b4cc8b2f50c", + "c8c2121c5b27415fb27ae6da49dad4f1" + ] + }, + "id": "VkGsVsAYiz-a", + "outputId": "65959787-0215-40b0-d078-7827ff7b70e1" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/3 [00:00