From d7b7a1ddbc87ba7ffb2b32465e0a3d04b9dd6316 Mon Sep 17 00:00:00 2001 From: Shivani Panchiwala <72301600+panchiwalashivani@users.noreply.github.com> Date: Tue, 27 Oct 2020 14:40:45 +0530 Subject: [PATCH] Hierarchical Clustering Algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduction to Hierarchical Clustering Hierarchical clustering is another unsupervised learning algorithm that is used to group the unlabeled data points having similar characteristics. Hierarchical clustering algorithms fall into the following two categories. Agglomerative hierarchical algorithms − In agglomerative hierarchical algorithms, each data point is treated as a single cluster and then successively merge or agglomerate (bottom-up approach) the pairs of clusters. The hierarchy of the clusters is represented as a dendrogram or tree structure. Divisive hierarchical algorithms − On the other hand, in divisive hierarchical algorithms, all the data points are treated as one big cluster and the process of clustering involves dividing (Top-down approach) the one big cluster into various small clusters. Steps to Perform Agglomerative Hierarchical Clustering We are going to explain the most used and important Hierarchical clustering i.e. agglomerative. The steps to perform the same is as follows − Step 1 − Treat each data point as single cluster. Hence, we will be having, say K clusters at start. The number of data points will also be K at start. Step 2 − Now, in this step, we need to form a big cluster by joining two closet datapoints. This will result in a total of K-1 clusters. Step 3 − Now, to form more clusters we need to join two closet clusters. This will result in a total of K-2 clusters. Step 4 − Now, to form one big cluster repeat the above three steps until K would become 0 i.e. no more data points left to join. Step 5 − At last, after making one single big cluster, dendrograms will be used to divide into multiple clusters depending upon the problem. --- ...rchical Clustering Algorithm example1.html | 13281 +++++++++++++++ ...rchical Clustering Algorithm example2.html | 13324 ++++++++++++++++ 2 files changed, 26605 insertions(+) create mode 100644 Clustering/Hierarchical Clustering Algorithm example1.html create mode 100644 Clustering/Hierarchical Clustering Algorithm example2.html diff --git a/Clustering/Hierarchical Clustering Algorithm example1.html b/Clustering/Hierarchical Clustering Algorithm example1.html new file mode 100644 index 0000000..9b6efdd --- /dev/null +++ b/Clustering/Hierarchical Clustering Algorithm example1.html @@ -0,0 +1,13281 @@ + + + + +Hierarchical Clustering Algorithm example1 + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
%matplotlib inline
+import matplotlib.pyplot as plt
+import numpy as np
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
X = np.array(
+   [[7,8],[12,20],[17,19],[26,15],[32,37],[87,75],[73,85], [62,80],[73,60],[87,96],])
+labels = range(1, 11)
+plt.figure(figsize = (10, 7))
+plt.subplots_adjust(bottom = 0.1)
+plt.scatter(X[:,0],X[:,1], label = 'True Position')
+for label, x, y in zip(labels, X[:, 0], X[:, 1]):
+   plt.annotate(
+      label,xy = (x, y), xytext = (-3, 3),textcoords = 'offset points', ha = 'right', va = 'bottom')
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
from scipy.cluster.hierarchy import dendrogram, linkage
+from matplotlib import pyplot as plt
+linked = linkage(X, 'single')
+labelList = range(1, 11)
+plt.figure(figsize = (10, 7))
+dendrogram(linked, orientation = 'top',labels = labelList, 
+   distance_sort ='descending',show_leaf_counts = True)
+plt.show()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [4]:
+
+
+
from sklearn.cluster import AgglomerativeClustering
+cluster = AgglomerativeClustering(n_clusters = 2, affinity = 'euclidean', linkage = 'ward')
+cluster.fit_predict(X)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + + +
+
array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=int32)
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
plt.scatter(X[:,0],X[:,1], c = cluster.labels_, cmap = 'rainbow')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[5]:
+ + + + +
+
<matplotlib.collections.PathCollection at 0x96db3e8>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/Clustering/Hierarchical Clustering Algorithm example2.html b/Clustering/Hierarchical Clustering Algorithm example2.html new file mode 100644 index 0000000..b37c13e --- /dev/null +++ b/Clustering/Hierarchical Clustering Algorithm example2.html @@ -0,0 +1,13324 @@ + + + + +Hierarchical Clustering Algorithm example2 + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import matplotlib.pyplot as plt
+import pandas as pd
+%matplotlib inline
+import numpy as np
+from pandas import read_csv
+path = r"C:\Users\pshiv\Desktop\CSV files\pima-indians-diabetes.csv"
+data = read_csv(path)
+array = data.values
+X = array[:,0:8]
+Y = array[:,8]
+data.shape
+(768, 9)
+data.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[1]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Number_of_PregrancyPlasma_glucoseDiastolic_BPTriceps_ThicknessSerum_InsulinBMIDiabetes_PedegreeAgeClass
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
+
+
+ +
+ +
+
+ +
+
+
+
In [2]:
+
+
+
patient_data = data.iloc[:, 3:5].values
+import scipy.cluster.hierarchy as shc
+plt.figure(figsize = (10, 7))
+plt.title("Patient Dendograms")
+dend = shc.dendrogram(shc.linkage(data, method = 'ward'))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
from sklearn.cluster import AgglomerativeClustering
+cluster = AgglomerativeClustering(n_clusters = 4, affinity = 'euclidean', linkage = 'ward')
+cluster.fit_predict(patient_data)
+plt.figure(figsize = (10, 7))
+plt.scatter(patient_data[:,0], patient_data[:,1], c = cluster.labels_, cmap = 'rainbow')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + + +
+
<matplotlib.collections.PathCollection at 0xa1fa520>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + +