{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import scipy as sp" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = sp.genfromtxt(\"./ch01/data/web_traffic.tsv\", delimiter=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1.00000000e+00 2.27200000e+03]\n", " [ 2.00000000e+00 nan]\n", " [ 3.00000000e+00 1.38600000e+03]\n", " ..., \n", " [ 7.41000000e+02 5.39200000e+03]\n", " [ 7.42000000e+02 5.90600000e+03]\n", " [ 7.43000000e+02 4.88100000e+03]]\n" ] } ], "source": [ "print(data)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(743, 2)\n" ] } ], "source": [ "print(data.shape)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1.0, 2272.0)\n", "(2.0, nan)\n", "[ 1.00000000e+00 2.27200000e+03]\n" ] } ], "source": [ "x = data[:,0]\n", "y = data[:,1]\n", "print(x[0], y[0]) #Get first column\n", "print(x[1], y[1]) #Get second column\n", "print(data[0,:]) #Get first row" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sp.sum(sp.isnan(y)) #Find invalid" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Clean vectors by constructing vector of which numbers are NaN and \n", "#using that to index the vector\n", "x = x[~sp.isnan(y)]\n", "y = y[~sp.isnan(y)]\n", "sp.sum(sp.isnan(y)) #No more invalid!" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done\n" ] } ], "source": [ "import matplotlib.pyplot as plt #Time for some plotting\n", "# plot the (x,y) points with dots of size 10\n", "plt.scatter(x, y, s=10)\n", "plt.title(\"Web traffic over the last month\")\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"Hits/hour\")\n", "plt.xticks([w*7*24 for w in range(10)], ['week %i' % w for w in range(10)])\n", "plt.autoscale(tight=True)\n", "\n", "# draw a slightly opaque, dashed grid\n", "plt.grid(True, linestyle='-', color='0.75')\n", "plt.show()\n", "print(\"Done\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model Parameters: [ 2.59619213 989.02487106]\n", "[ 3.17389767e+08]\n", "317389767.34\n" ] } ], "source": [ "#Model Error: squared distance of model prediction to real data\n", "def error(f, x, y):\n", " return sp.sum((f(x)-y)**2)\n", "\n", "# Do the maths to create our model params\n", "fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)\n", "\n", "# Meaning our model is f(x) = 2.59619213 * x + 989.02487106\n", "print(\"Model Parameters: %s\" % fp1)\n", "print(residuals)\n", "\n", "#Create model from model params:\n", "f1 = sp.poly1d(fp1)\n", "print(error(f1, x, y))" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Plot the new model against the data\n", "fx = sp.linspace(0,x[-1], 1000) # generate X-values for plotting\n", "plt.plot(fx, f1(fx), linewidth=4)\n", "plt.legend([\"d=%i\" % f1.order], loc=\"upper left\")\n", "# plt.show()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1.05322215e-02 -5.26545650e+00 1.97476082e+03]\n", "179983507.878\n" ] } ], "source": [ "# Time for some polynomial moddeling\n", "f2p = sp.polyfit(x, y, 2)\n", "print(f2p)\n", "\n", "f2 = sp.poly1d(f2p)\n", "print(error(f2, x, y))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Plot the poly model\n", "fx = sp.linspace(0,x[-1], 1000) # generate X-values for plotting\n", "plt.plot(fx, f2(fx), linewidth=4)\n", "plt.legend([\"d=%i\" % f1.order], loc=\"upper left\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error inflection=132950348.197616\n" ] } ], "source": [ "inflection = 3.5*7*24 # inflection point in hours\n", "xa = x[:inflection]\n", "ya = y[:inflection]\n", "xb = x[inflection:]\n", "yb = y[inflection:]\n", "\n", "fa = sp.poly1d(sp.polyfit(xa, ya, 1))\n", "fb = sp.poly1d(sp.polyfit(xb, yb, 1))\n", "\n", "fa_error = error(fa, xa, ya)\n", "fb_error = error(fb, xb, yb)\n", "\n", "print(\"Error inflection=%f\" % (fa_error + fb_error))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }