tiyd-python-2015-01 · DanielKN · Feb 11, 2015
diff --git a/Spam Classification.ipynb b/Spam Classification.ipynb
@@ -0,0 +1,150 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:bd2eaf7eb0bd39236257fcccb3eac699669f9a12530cdc65325c8a1f7dfaede9"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pandas as pd"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_base = pd.read_csv(\"spambase.data\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_target = spam_base.pop(\"1\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 18
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from sklearn.cross_validation import train_test_split\n",
+      "X_train, X_test, y_train, y_test = train_test_split(spam_base, spam_target, \n",
+      "                                                    test_size=0.4, random_state=0)\n",
+      "from sklearn.tree import DecisionTreeClassifier\n",
+      "classifier = DecisionTreeClassifier(max_depth = 9)\n",
+      "classifier = classifier.fit(X_train, y_train)\n",
+      "predicted = classifier.predict(X_test)\n",
+      "\n",
+      "from sklearn import metrics\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "\n",
+      "from sklearn.cross_validation import cross_val_score\n",
+      "\n",
+      "scores = cross_val_score(classifier, spam_base, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "          0       0.92      0.94      0.93      1097\n",
+        "          1       0.91      0.88      0.89       743\n",
+        "\n",
+        "avg / total       0.92      0.92      0.92      1840\n",
+        "\n",
+        "[[1030   67]\n",
+        " [  89  654]]\n",
+        "0.893442622951\n"
+       ]
+      }
+     ],
+     "prompt_number": 29
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_target"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 23,
+       "text": [
+        "0     1\n",
+        "1     1\n",
+        "2     1\n",
+        "3     1\n",
+        "4     1\n",
+        "5     1\n",
+        "6     1\n",
+        "7     1\n",
+        "8     1\n",
+        "9     1\n",
+        "10    1\n",
+        "11    1\n",
+        "12    1\n",
+        "13    1\n",
+        "14    1\n",
+        "...\n",
+        "4585    0\n",
+        "4586    0\n",
+        "4587    0\n",
+        "4588    0\n",
+        "4589    0\n",
+        "4590    0\n",
+        "4591    0\n",
+        "4592    0\n",
+        "4593    0\n",
+        "4594    0\n",
+        "4595    0\n",
+        "4596    0\n",
+        "4597    0\n",
+        "4598    0\n",
+        "4599    0\n",
+        "Name: 1, Length: 4600, dtype: int64"
+       ]
+      }
+     ],
+     "prompt_number": 23
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}