diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..a78b9c7 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify whic +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/README.md b/README.md index 4da9732..9e3482e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,10 @@ # Machine-Learning-with-Python ![GitHub stars](https://img.shields.io/github/stars/devAmoghS/Machine-Learning-with-Python?style=for-the-badge) ![GitHub forks](https://img.shields.io/github/forks/devAmoghS/Machine-Learning-with-Python?label=Forks&style=for-the-badge) + +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=devAmoghS/Machine-Learning-with-Python&type=Date)](https://star-history.com/#devAmoghS/Machine-Learning-with-Python&Date) + + ![alt text](https://media.istockphoto.com/vectors/machine-learning-3-step-infographic-artificial-intelligence-machine-vector-id962219860?k=6&m=962219860&s=612x612&w=0&h=yricYyUqZbILMHp3IvtenS3xbRDhu1w1u5kk2az5tbo=) ## Small scale machine learning projects to understand the core concepts (order: oldest to newest) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..034e848 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,21 @@ +# Security Policy + +## Supported Versions + +Use this section to tell people about which versions of your project are +currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 5.1.x | :white_check_mark: | +| 5.0.x | :x: | +| 4.0.x | :white_check_mark: | +| < 4.0 | :x: | + +## Reporting a Vulnerability + +Use this section to tell people how to report a vulnerability. + +Tell them where to go, how often they can expect to get an update on a +reported vulnerability, what to expect if the vulnerability is accepted or +declined, etc. diff --git a/interview_prep.md b/interview_prep.md index 141d40a..b9f25af 100644 --- a/interview_prep.md +++ b/interview_prep.md @@ -86,7 +86,19 @@ This means that you automatically know the thickness of 6th book even though you This means that if you have measured (n-1) objects then the nth object has no freedom to vary. Therefore, degree of freedom is only (n-1) and not n. -### 7. What are the assumptions of the normal distribution ? Why is it useful ? +### 7. What are the assumptions of the linear regression model ? Why is it useful ? +We can divide the basic assumptions of linear regression into two categories based on whether the assumptions are about the explanatory variables (i.e. features) or the residuals. + +#### Assumptions about the explanatory variables (features): +* Linearity +* No multicollinearity + +#### Assumptions about the error terms (residuals): +* Gaussian distribution +* Homoskedasticity +* No autocorrelation +* Zero conditional mean + ### 8. What are the different approches to outlier detection ? How will you handle the outliers? Why is it useful ? ### 9. How you assess OLS regression models ? Three statistics are used in Ordinary Least Squares (OLS) regression to evaluate model fit: diff --git a/prec_rec_curve.py b/prec_rec_curve.py new file mode 100644 index 0000000..7bde221 --- /dev/null +++ b/prec_rec_curve.py @@ -0,0 +1,143 @@ +import numpy as np +from sklearn.metrics import confusion_matrix, precision_score, recall_score +import matplotlib.pyplot as plt +import matplotlib.patches as ptch + +# Appendix A - working with single threshold +pred_scores = [0.7, 0.3, 0.5, 0.6, 0.55, 0.9, 0.4, 0.2, 0.4, 0.3] +y_true = ["positive", "negative", "negative", "positive", "positive", "positive", "negative", "positive", "negative", "positive"] + +# To convert the scores into a class label, a threshold is used. +# When the score is equal to or above the threshold, the sample is classified as one class. +# Otherwise, it is classified as the other class. +# Suppose a sample is Positive if its score is above or equal to the threshold. Otherwise, it is Negative. +# The next block of code converts the scores into class labels with a threshold of 0.5. + +threshold = 0.5 + +y_pred = ["positive" if score >= threshold else "negative" for score in pred_scores] +print(y_pred) + +r = np.flip(confusion_matrix(y_true, y_pred)) +print("\n# Confusion Matrix (From Left to Right & Top to Bottom: \nTrue Positive, False Negative, \nFalse Positive, True Negative)") +print(r) + +# Remember that the higher the precision, the more confident the model is when it classifies a sample as Positive. +# Higher the recall, the more positive samples the model correctly classified as Positive. + +precision = precision_score(y_true=y_true, y_pred=y_pred, pos_label="positive") +print("\n# Precision = 4/(4+1)") +print(precision) + +recall = recall_score(y_true=y_true, y_pred=y_pred, pos_label="positive") +print("\n# Recall = 4/(4+2)") +print(recall) + +# Appendix B - working with multiple thresholds +y_true = ["positive", "negative", "negative", "positive", "positive", "positive", "negative", "positive", "negative", "positive", "positive", "positive", "positive", "negative", "negative", "negative"] + +pred_scores = [0.7, 0.3, 0.5, 0.6, 0.55, 0.9, 0.4, 0.2, 0.4, 0.3, 0.7, 0.5, 0.8, 0.2, 0.3, 0.35] + +thresholds = np.arange(start=0.2, stop=0.7, step=0.05) + +# Due to the importance of both precision and recall, there is a precision-recall curve that shows +# the tradeoff between the precision and recall values for different thresholds. +# This curve helps to select the best threshold to maximize both metrics + +def precision_recall_curve(y_true, pred_scores, thresholds): + precisions = [] + recalls = [] + f1_scores = [] + + for threshold in thresholds: + y_pred = ["positive" if score >= threshold else "negative" for score in pred_scores] + + precision = precision_score(y_true=y_true, y_pred=y_pred, pos_label="positive") + recall = recall_score(y_true=y_true, y_pred=y_pred, pos_label="positive") + f1_score = (2 * precision * recall) / (precision + recall) + + precisions.append(precision) + recalls.append(recall) + f1_scores.append(f1_score) + + return precisions, recalls, f1_scores + +precisions, recalls, f1_scores = precision_recall_curve(y_true=y_true, + pred_scores=pred_scores, + thresholds=thresholds) + +print("\nRecall:: Precision :: F1-Score",) +for p, r, f in zip(precisions, recalls, f1_scores): + print(round(r,4),"\t::\t",round(p,4),"\t::\t",round(f,4)) + +# np.max() returns the max. value in the array +# np.argmax() will return the index of the value found by np.max() + +print('Best F1-Score: ', np.max(f1_scores)) +idx_best_f1 = np.argmax(f1_scores) +print('\nBest threshold: ', thresholds[idx_best_f1]) +print('Index of threshold: ', idx_best_f1) + +# Can disable comment to display the plot + +# plt.plot(recalls, precisions, linewidth=4, color="red") +# plt.scatter(recalls[idx_best_f1], precisions[idx_best_f1], zorder=1, linewidth=6) +# plt.xlabel("Recall", fontsize=12, fontweight='bold') +# plt.ylabel("Precision", fontsize=12, fontweight='bold') +# plt.title("Precision-Recall Curve", fontsize=15, fontweight="bold") +# plt.show() + +# Appendix C - average precision (AP) +precisions, recalls, f1_scores = precision_recall_curve(y_true=y_true, + pred_scores=pred_scores, + thresholds=thresholds) + +precisions.append(1) +recalls.append(0) + +precisions = np.array(precisions) +recalls = np.array(recalls) + +print('\nRecall ::',recalls) +print('Precision ::',precisions) + +AP = np.sum((recalls[:-1] - recalls[1:]) * precisions[:-1]) +print("\nAP --", AP) + +# Appendix D - Intersection over Union + +# gt_box -- ground-truth bounding box +# pred_box -- prediction bounding box +def intersection_over_union(gt_box, pred_box): + + inter_box_top_left = [max(gt_box[0], pred_box[0]), max(gt_box[1], pred_box[1])] + + print("\ninter_box_top_left:", inter_box_top_left) + print("gt_box:", gt_box) + print("pred_box:", pred_box) + inter_box_bottom_right = [min(gt_box[0]+gt_box[2], pred_box[0]+pred_box[2]), min(gt_box[1]+gt_box[3], pred_box[1]+pred_box[3])] + print("inter_box_bottom_right:", inter_box_bottom_right) + + inter_box_w = inter_box_bottom_right[0] - inter_box_top_left[0] + print("inter_box_w:", inter_box_w) + inter_box_h = inter_box_bottom_right[1] - inter_box_top_left[1] + print("inter_box_h:", inter_box_h) + + intersection = inter_box_w * inter_box_h + union = gt_box[2] * gt_box[3] + pred_box[2] * pred_box[3] - intersection + + iou = intersection / union + + return iou, intersection, union + +gt_box1 = [320, 220, 680, 900] +pred_box1 = [500, 320, 550, 700] + +gt_box2 = [645, 130, 310, 320] +pred_box2 = [500, 60, 310, 320] + +iou1 = intersection_over_union(gt_box1, pred_box1) +print("\nIOU1 ::", iou1) + +iou2 = intersection_over_union(gt_box2, pred_box2) +print("\nIOU2 ::", iou2) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index fb81870..8d8c8e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,20 @@ -Keras==2.2.4 +Keras==2.13.1 Keras-Preprocessing==1.0.5 PySocks==1.6.8 -Pygments==2.7.4 +Pygments==2.15.0 Quandl==3.4.5 asn1crypto==0.24.0 backcall==0.1.0 beautifulsoup4==4.6.3 -certifi==2018.8.24 +certifi==2023.7.22 cffi==1.11.5 chardet==3.0.4 -cryptography==3.3.2 +cryptography==44.0.1 cycler==0.10.0 h5py==2.9.0 -idna==2.7 +idna==3.7 inflection==0.3.1 -ipython==7.16.3 +ipython==8.10.0 jedi==0.13.2 kiwisolver==1.0.1 matplotlib==3.0.0 @@ -24,7 +24,7 @@ pandas==0.23.4 patsy==0.5.0 pexpect==4.6.0 pickleshare==0.7.5 -pip==21.1 +pip==23.3 ptyprocess==0.6.0 pyOpenSSL==18.0.0 pycparser==2.19 @@ -32,13 +32,13 @@ pyparsing==2.2.1 python-dateutil==2.7.3 pytz==2018.5 requests>=2.20.0 -scikit-learn==0.20.0 -scipy==1.1.0 +scikit-learn==1.5.0 +scipy==1.10.0 seaborn==0.9.0 -setuptools==40.2.0 +setuptools==70.0.0 six==1.11.0 statsmodels==0.9.0 -tornado==5.1.1 +tornado==6.4.2 traitlets==4.3.2 wcwidth==0.1.7 -wheel==0.31.1 +wheel==0.38.1