From 3515e58c2bd97567d77f9b1661df4e9faab1bca9 Mon Sep 17 00:00:00 2001 From: Wajdi <62748386+wajdi404@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:11:19 +0100 Subject: [PATCH] Created using Colab --- TinyML.ipynb | 1552 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1552 insertions(+) create mode 100644 TinyML.ipynb diff --git a/TinyML.ipynb b/TinyML.ipynb new file mode 100644 index 0000000..a982421 --- /dev/null +++ b/TinyML.ipynb @@ -0,0 +1,1552 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "import zipfile\n", + "import os\n", + "\n", + "# URL of the dataset\n", + "url = 'https://zenodo.org/records/4686379/files/iotj_dataset_zenodo.zip?download=1'\n", + "\n", + "# Define the local filename to save the dataset\n", + "local_filename = 'iotj_dataset_zenodo.zip'\n", + "\n", + "# Download the file from the URL\n", + "response = requests.get(url, stream=True)\n", + "\n", + "# Save the file locally\n", + "with open(local_filename, 'wb') as file:\n", + " for chunk in response.iter_content(chunk_size=128):\n", + " file.write(chunk)\n", + "\n", + "print(f\"Dataset downloaded and saved as {local_filename}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qoAsvUYWAJcF", + "outputId": "eafa67f1-50e7-46d1-d046-ed78780a1904" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Dataset downloaded and saved as iotj_dataset_zenodo.zip\n", + "Dataset extracted to 'iotj_dataset' folder\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['iotj_dataset_zenodo']" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "local_filename = '.zip'\n", + "# Unzip the downloaded file\n", + "with zipfile.ZipFile(local_filename, 'r') as zip_ref:\n", + " zip_ref.extractall(\"iotj_dataset\")\n", + " print(\"Dataset extracted to 'iotj_dataset' folder\")" + ], + "metadata": { + "id": "VPk70X2HAqdG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Check the contents of the folder\n", + "os.listdir(\"/content/iotj_dataset/iotj_dataset_zenodo\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "roB6oNFBAoTI", + "outputId": "e90c9613-8165-4d95-c00f-b6621bb976af" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['photos', 'localization', 'ranging', 'readme.md']" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def print_directory_tree(root_dir, indent=\"\"):\n", + " # Print the current directory\n", + " print(f\"{indent}|-- {os.path.basename(root_dir)}/\")\n", + "\n", + " # Get the list of directories in the current directory\n", + " items = os.listdir(root_dir)\n", + " dirs = [item for item in items if os.path.isdir(os.path.join(root_dir, item))]\n", + "\n", + " # Recursively print the directories\n", + " for d in dirs:\n", + " print_directory_tree(os.path.join(root_dir, d), indent + \" \")\n", + "\n", + "\n", + "# Print the directory structure for the \"iotj_dataset\"\n", + "print_directory_tree(\"/content/iotj_dataset/iotj_dataset_zenodo/ranging\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mpQjKUEeBt3D", + "outputId": "294f80b2-e19d-4202-8054-4ed90b016844" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "|-- ranging/\n", + " |-- code/\n", + " |-- data/\n", + " |-- 3db/\n", + " |-- 3db_rec_16/\n", + " |-- 3db_rec_3/\n", + " |-- backup/\n", + " |-- 3db_rec_11/\n", + " |-- 3db_rec_1/\n", + " |-- 3db_rec_6/\n", + " |-- 3db_rec_9/\n", + " |-- 3db_rec_4/\n", + " |-- 3db_rec_13/\n", + " |-- 3db_rec_12/\n", + " |-- 3db_rec_17/\n", + " |-- 3db_rec_10/\n", + " |-- 3db_rec_15/\n", + " |-- 3db_rec_7/\n", + " |-- 3db_rec_5/\n", + " |-- backup/\n", + " |-- 3db_rec_16/\n", + " |-- 3db_rec_3/\n", + " |-- backup/\n", + " |-- 3db_rec_11/\n", + " |-- 3db_rec_1/\n", + " |-- 3db_rec_6/\n", + " |-- 3db_rec_19/\n", + " |-- 3db_rec_9/\n", + " |-- 3db_rec_4/\n", + " |-- 3db_rec_13/\n", + " |-- 3db_rec_12/\n", + " |-- 3db_rec_17/\n", + " |-- 3db_rec_10/\n", + " |-- 3db_rec_15/\n", + " |-- 3db_rec_7/\n", + " |-- 3db_rec_5/\n", + " |-- 3db_rec_18/\n", + " |-- 3db_rec_2/\n", + " |-- 3db_rec_14/\n", + " |-- 3db_rec_8/\n", + " |-- 3db_rec_18/\n", + " |-- 3db_rec_2/\n", + " |-- 3db_rec_14/\n", + " |-- 3db_rec_8/\n", + " |-- decawave/\n", + " |-- dw_rec_3/\n", + " |-- dw_rec_4/\n", + " |-- dw_rec_1/\n", + " |-- dw_rec_2/\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Load the CSV files\n", + "\n", + "\n", + "dw_rec_1_pd = pd.read_csv( \"/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_1/data.csv\")\n", + "dw_rec_2_pd = pd.read_csv( \"/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_2/data.csv\")\n", + "dw_rec_3_pd = pd.read_csv( \"/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_3/data.csv\")\n", + "dw_rec_4_pd = pd.read_csv( \"/content/iotj_dataset/iotj_dataset_zenodo/ranging/data/decawave/dw_rec_4/data.csv\")\n", + "\n", + "# Extract 'true_dist' and 'measured_dist' columns from each DataFrame\n", + "dw_rec_1_subset = dw_rec_1_pd[['true_dist', 'measured_dist']]\n", + "dw_rec_2_subset = dw_rec_2_pd[['true_dist', 'measured_dist']]\n", + "dw_rec_3_subset = dw_rec_3_pd[['true_dist', 'measured_dist']]\n", + "dw_rec_4_subset = dw_rec_4_pd[['true_dist', 'measured_dist']]\n", + "\n", + "# Concatenate the extracted columns from all four files into one DataFrame\n", + "combined_df = pd.concat([dw_rec_1_subset, dw_rec_2_subset, dw_rec_3_subset, dw_rec_4_subset], ignore_index=True)" + ], + "metadata": { + "id": "LYXYa7GIDLVY" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "combined_df" + ], + "metadata": { + "id": "P6dkHksUK1lO", + "outputId": "9addd0f8-253e-4d58-98d7-00e82f0bb822", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + } + }, + "execution_count": 28, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " true_dist measured_dist\n", + "0 1 0.953603\n", + "1 1 1.004472\n", + "2 1 0.943430\n", + "3 1 0.943430\n", + "4 1 0.963777\n", + "... ... ...\n", + "5195 3 3.491634\n", + "5196 3 3.441934\n", + "5197 3 3.511514\n", + "5198 3 3.451874\n", + "5199 3 3.451874\n", + "\n", + "[5200 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
true_distmeasured_dist
010.953603
111.004472
210.943430
310.943430
410.963777
.........
519533.491634
519633.441934
519733.511514
519833.451874
519933.451874
\n", + "

5200 rows × 2 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "combined_df", + "summary": "{\n \"name\": \"combined_df\",\n \"rows\": 5200,\n \"fields\": [\n {\n \"column\": \"true_dist\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 10,\n \"num_unique_values\": 9,\n \"samples\": [\n 8,\n 2,\n 6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"measured_dist\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.3340420864530014,\n \"min\": 0.9027344998789172,\n \"max\": 11.381706220155852,\n \"num_unique_values\": 362,\n \"samples\": [\n 5.653879930334576,\n 3.9853834622516464,\n 1.9099366360997097\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Shuffle the combined DataFrame\n", + "shuffled_df = combined_df.sample(frac=1).reset_index(drop=True)" + ], + "metadata": { + "id": "WWOHw-BbNKj1" + }, + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "shuffled_df" + ], + "metadata": { + "id": "2pPO4jyGNMH4", + "outputId": "2eef0b00-e5cb-440b-b3e3-8613bdb9eac7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + } + }, + "execution_count": 31, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " true_dist measured_dist\n", + "0 3 2.964814\n", + "1 2 1.909937\n", + "2 3 2.964814\n", + "3 2 2.357582\n", + "4 5 4.773893\n", + "... ... ...\n", + "5195 5 5.948919\n", + "5196 2 2.318715\n", + "5197 2 2.378355\n", + "5198 2 2.030455\n", + "5199 5 4.803713\n", + "\n", + "[5200 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
true_distmeasured_dist
032.964814
121.909937
232.964814
322.357582
454.773893
.........
519555.948919
519622.318715
519722.378355
519822.030455
519954.803713
\n", + "

5200 rows × 2 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "shuffled_df", + "summary": "{\n \"name\": \"shuffled_df\",\n \"rows\": 5200,\n \"fields\": [\n {\n \"column\": \"true_dist\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 10,\n \"num_unique_values\": 9,\n \"samples\": [\n 8,\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"measured_dist\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.334042086453001,\n \"min\": 0.9027344998789172,\n \"max\": 11.381706220155852,\n \"num_unique_values\": 362,\n \"samples\": [\n 2.571230967961846,\n 7.983670730279641,\n 4.803712644246911\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### **Random Forest Regressor**" + ], + "metadata": { + "id": "NM0x9icyQL-k" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "import joblib\n", + "\n", + "X = shuffled_df[['measured_dist']]\n", + "y = shuffled_df['true_dist']\n", + "\n", + "# Split data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Initialize and train the model\n", + "model = RandomForestRegressor(n_estimators=100, random_state=42)\n", + "model.fit(X_train, y_train)" + ], + "metadata": { + "id": "SIS7M0tpOIoU", + "outputId": "def61a0d-f53c-41ef-8418-25aaddf2f943", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81 + } + }, + "execution_count": 36, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestRegressor(random_state=42)" + ], + "text/html": [ + "
RandomForestRegressor(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Make predictions and evaluate the model\n", + "y_pred = model.predict(X_test)\n", + "mae = mean_absolute_error(y_test, y_pred)\n", + "mse = mean_squared_error(y_test, y_pred)\n", + "\n", + "# Calculate the mean of the true distances\n", + "mean_true_distance = np.mean(y_test)\n", + "\n", + "# Convert MAE and MSE to percentages\n", + "mae_percentage = (mae / mean_true_distance) * 100\n", + "mse_percentage = (mse / mean_true_distance) * 100\n", + "\n", + "# Print results\n", + "print(f'Mean Absolute Error (as a percentage): {mae_percentage:.2f}%')\n", + "print(f'Mean Squared Error (as a percentage): {mse_percentage:.2f}%')" + ], + "metadata": { + "id": "pt2i6mipOg3n", + "outputId": "0f0116de-f54a-43d2-8d61-914b86e068d7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mean Absolute Error (as a percentage): 0.09%\n", + "Mean Squared Error (as a percentage): 0.06%\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### **Random Forest Regressor**" + ], + "metadata": { + "id": "WwvxZKSxQlCd" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "bxucXB59Ozj_" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file