{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# We load the data with load_iris from sklearn\n",
    "from sklearn.datasets import load_iris\n",
    "\n",
    "data = load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# load_iris returns an object with several fields\n",
    "features = data.data\n",
    "feature_names = data.feature_names\n",
    "target = data.target\n",
    "target_names = data.target_names\n",
    "for t in range(3):\n",
    "    if t == 0:\n",
    "        c = 'r'\n",
    "        marker = '>'\n",
    "    elif t == 1:\n",
    "        c = 'g'\n",
    "        marker = 'o'\n",
    "    elif t == 2:\n",
    "        c = 'b'\n",
    "        marker = 'x'\n",
    "    plt.scatter(features[target == t,0],\n",
    "                features[target == t,1],\n",
    "                marker=marker,\n",
    "                c=c)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Maximum of setosa: 1.9.\n",
      "Minimum of others: 3.0.\n"
     ]
    }
   ],
   "source": [
    "# We use NumPy fancy indexing to get an array of strings:\n",
    "labels = target_names[target]\n",
    "\n",
    "# The petal length is the feature at position 2\n",
    "plength = features[:, 2]\n",
    "\n",
    "# Build an array of booleans:\n",
    "is_setosa = (labels == 'setosa')\n",
    "\n",
    "# This is the important step:\n",
    "max_setosa = plength[is_setosa].max()\n",
    "min_non_setosa = plength[~is_setosa].min()\n",
    "print('Maximum of setosa: {0}.'.format(max_setosa))\n",
    "print('Minimum of others: {0}.'.format(min_non_setosa))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# ~ is the boolean negation operator\n",
    "features = features[~is_setosa]\n",
    "labels = labels[~is_setosa]\n",
    "# Build a new target variable, is_virginica\n",
    "is_virginica = (labels == 'virginica')\n",
    "# print(is_virginica)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best acc: 0.94, fi: 3, t: 1.6, reverse: False\n"
     ]
    }
   ],
   "source": [
    "# Check which feature + threshold results in best classification \n",
    "# accuracy\n",
    "\n",
    "# Initialize best_acc to impossibly low value\n",
    "best_acc = -1.0\n",
    "\n",
    "for fi in range(features.shape[1]):\n",
    "  # We are going to test all possible thresholds\n",
    "  # print(\"Testing %s\" % fi)\n",
    "  thresh = features[:,fi]\n",
    "  for t in thresh:\n",
    "    # Get the vector for feature `fi`\n",
    "    feature_i = features[:, fi]\n",
    "\n",
    "    # apply threshold `t`\n",
    "    pred = (feature_i > t)\n",
    "    acc = (pred == is_virginica).mean()\n",
    "    rev_acc = (pred == ~is_virginica).mean()\n",
    "\n",
    "    if rev_acc > acc:\n",
    "      reverse = True\n",
    "      acc = rev_acc\n",
    "    else:\n",
    "      reverse = False\n",
    "      \n",
    "    if acc > best_acc:\n",
    "      best_acc = acc\n",
    "      best_fi = fi\n",
    "      best_t = t\n",
    "      best_reverse = reverse\n",
    "        \n",
    "print(\"Best acc: %s, fi: %s, t: %s, reverse: %s\" \n",
    "      % (best_acc, best_fi, best_t, best_reverse))        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "#print(features[1, :])\n",
    "#And now we have our classification method\n",
    "def is_virginica_test(fi, t, reverse, example):\n",
    "    \"Apply threshold model to a new example\"\n",
    "    test = example[fi] > t\n",
    "    if reverse:\n",
    "        test = not test\n",
    "    return test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}