{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "NNamP65y8eGf"
      },
      "outputs": [],
      "source": [
        "from sklearn import datasets\n",
        "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
        "from sklearn.decomposition import PCA, KernelPCA\n",
        "from sklearn.datasets import make_circles\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from sklearn.decomposition import NMF\n",
        "from sklearn.decomposition import TruncatedSVD\n",
        "from scipy.sparse import csr_matrix"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fvJfKhFq8hQc",
        "outputId": "acbc4c59-acbd-4ff4-bacb-e54b55e0312f"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Original number of features: 64\n",
            "Reduced number of features: 40\n"
          ]
        }
      ],
      "source": [
        "# Load the data\n",
        "digits = datasets.load_digits()\n",
        "# Feature matrix standardization\n",
        "features = StandardScaler().fit_transform(digits.data)\n",
        "# Perform PCA While retaining 80% of variance\n",
        "pca = PCA(n_components=0.95, whiten=True)\n",
        "# perform PCA\n",
        "pcafeatures = pca.fit_transform(features)\n",
        "# Display results\n",
        "print(\"Original number of features:\", features.shape[1])\n",
        "print(\"Reduced number of features:\", pcafeatures.shape[1])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jyU800Lf8it4",
        "outputId": "0d4c73bf-7d08-48e6-a44f-a5647a2e0c11"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Original number of features: 2\n",
            "Reduced number of features: 1\n"
          ]
        }
      ],
      "source": [
        "# Creation of the linearly inseparable data\n",
        "features, _ = make_circles(n_samples=2000, random_state=1, noise=0.1, factor=0.1)\n",
        "# kernal PCA with radius basis function (RBF) kernel application\n",
        "k_pca = KernelPCA(kernel=\"rbf\", gamma=16, n_components=1)\n",
        "k_pcaf = k_pca.fit_transform(features)\n",
        "print(\"Original number of features:\", features.shape[1])\n",
        "print(\"Reduced number of features:\", k_pcaf.shape[1])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IfCo5TA28kn6",
        "outputId": "312956a9-9fb5-4296-d766-a3e642649da1"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "number of features(original): 4\n",
            "number of features that was reduced: 1\n"
          ]
        }
      ],
      "source": [
        "#flower dataset loading:\n",
        "iris = datasets.load_iris()\n",
        "features = iris.data\n",
        "target = iris.target\n",
        "# Creation of LDA. Use of LDA for features transformation\n",
        "lda = LinearDiscriminantAnalysis(n_components=1)\n",
        "features_lda = lda.fit(features, target).transform(features)\n",
        "# Print the number of features\n",
        "print(\"number of features(original):\", features.shape[1])\n",
        "print(\"number of features that was reduced:\", features_lda.shape[1])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yjQBlMtM8mQu",
        "outputId": "800279fb-f44b-43e8-9210-a35b8e190fc7"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "array([0.9912126])"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "lda.explained_variance_ratio_"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tHOWTxn18nf7",
        "outputId": "ae3c857a-0ca8-4508-affc-b5ea4dff6788"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Load Iris flower dataset:\n",
        "iris123 = datasets.load_iris()\n",
        "features = iris123.data\n",
        "target = iris123.target\n",
        "# Create and run LDA\n",
        "lda_r = LinearDiscriminantAnalysis(n_components=None)\n",
        "features_lda = lda_r.fit(features, target)\n",
        "# array of explained variance ratios\n",
        "lda_var_r = lda_r.explained_variance_ratio_\n",
        "# function ceration\n",
        "def select_n_c(v_ratio, g_var: float) -> int:\n",
        "    # initial variance explained setting\n",
        "    total_v = 0.0\n",
        "    # number of features initialisation\n",
        "    n_components = 0\n",
        "    # If we consider explained variance of each feature:\n",
        "    for explained_v in v_ratio:\n",
        "        # explained variance addition to the total\n",
        "        total_v += explained_v\n",
        "        # add one to number of components\n",
        "        n_components += 1\n",
        "        # we attain our goal level of explained variance\n",
        "        if total_v >= g_var:\n",
        "            # end the loop\n",
        "            break\n",
        "    # return the number of components\n",
        "    return n_components\n",
        "\n",
        "# run the function\n",
        "select_n_c(lda_var_r, 0.95)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "12zwY1Du8o6i",
        "outputId": "e9178fdf-2195-41cc-f4c3-a1e52c030df5"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:294: FutureWarning: The 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).\n",
            "  FutureWarning,\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Original number of features: 64\n",
            "Reduced number of features: 12\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:1641: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.\n",
            "  ConvergenceWarning,\n"
          ]
        }
      ],
      "source": [
        "# data loading\n",
        "digit = datasets.load_digits()\n",
        "# feature matrix loading\n",
        "feature_m = digit.data\n",
        "# Creation, fit and application of NMF\n",
        "n_mf = NMF(n_components=12, random_state=1)\n",
        "features_nmf = n_mf.fit_transform(feature_m)\n",
        "# Show results\n",
        "print(\"Original number of features:\", feature_m.shape[1])\n",
        "print(\"Reduced number of features:\", features_nmf.shape[1])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wrEYF9Ql8qtU",
        "outputId": "c28d28be-4f0b-4bd7-bb56-fde6ead38a45"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Original number of features: 64\n",
            "Reduced number of features: 12\n"
          ]
        }
      ],
      "source": [
        "# data loading\n",
        "digit123 = datasets.load_digits()\n",
        "#  feature matrix Standardization\n",
        "features_m = StandardScaler().fit_transform(digit123.data)\n",
        "# sparse matrix creation\n",
        "f_sparse = csr_matrix(features_m)\n",
        "# TSVD creation\n",
        "tsvd = TruncatedSVD(n_components=12)\n",
        "# sparse matrix TSVD\n",
        "features_sp_tsvd = tsvd.fit(f_sparse).transform(f_sparse)\n",
        "# results\n",
        "print(\"Original number of features:\", f_sparse.shape[1])\n",
        "print(\"Reduced number of features:\", features_sp_tsvd.shape[1])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xRQ_nUf_8sZA",
        "outputId": "19b8d99c-b330-406d-e728-407c18d82f20"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "0.3003938539283667"
            ]
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Sum of first three components' explained variance ratios\n",
        "tsvd.explained_variance_ratio_[0:3].sum()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "zbExVkXp8vpi"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "name": "DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb",
      "provenance": []
    },
    "interpreter": {
      "hash": "f89a88aed07bbcd763ac68893150ace71e487877d8c6527a76855322f20001c6"
    },
    "kernelspec": {
      "display_name": "Python 3.9.12 64-bit",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.9.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}