Skip to content

Commit b2b900e

Browse files
initial commit
1 parent a265668 commit b2b900e

19 files changed

+37238
-0
lines changed

App.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from Classifier import Classififer
2+
import pandas as pd
3+
import numpy as np
4+
from Dataset import Dataset
5+
6+
7+
class App:
8+
def __init__(self):
9+
self.classifier = Classififer().get_classifier();
10+
11+
12+
def train(self):
13+
df = pd.read_csv('data/train.csv', header=None)
14+
data = np.array(df)
15+
self.x_train = data[:, :-1]
16+
self.y_train = data[:, -1:]
17+
self.classifier.fit(self.x_train,self.y_train)
18+
19+
def test(self):
20+
self.ds_obj = Dataset()
21+
ds = self.ds_obj.read_dataset()
22+
new_ds = []
23+
for row in ds:
24+
new_ds.append(row[1:])
25+
self.x_test = np.array(new_ds)
26+
self.results = self.classifier.predict(self.x_test)
27+
28+
def post_test(self):
29+
client_ip_ids = []
30+
total_test,_ = self.x_test.shape
31+
32+
for i in range(total_test):
33+
if self.results[i]==1 :
34+
if self.x_test[i,1] not in client_ip_ids:
35+
client_ip_ids.append(self.x_test[i,1])
36+
dos_ips = self.ds_obj.detransform_client_ip(np.array(client_ip_ids,dtype="int64"))
37+
for ip in dos_ips:
38+
print ip
39+
40+
def run(self):
41+
self.train()
42+
self.test()
43+
self.post_test()
44+
45+
46+
if __name__ == '__main__':
47+
app = App()
48+
app.run()
49+
50+

Classifier.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from sklearn import svm
2+
from sklearn.tree import DecisionTreeClassifier
3+
4+
class Classififer:
5+
def __init__(self):
6+
pass
7+
8+
def get_classifier(self):
9+
'''
10+
returns Classifier object
11+
'''
12+
clf = DecisionTreeClassifier()
13+
return clf

Classifier.pyc

829 Bytes
Binary file not shown.

Dataset.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import sklearn
2+
import numpy as np
3+
from utils.LogHelper import Logs
4+
from utils.DateUtil import get_microseconds
5+
from utils.Anomaly import Anomaly
6+
from sklearn import preprocessing
7+
8+
9+
class Dataset:
10+
11+
def __init__(self):
12+
13+
self.logs = Logs().read()
14+
self.client_ip_label_encoder = preprocessing.LabelEncoder()
15+
self.request_method_label_encoder = preprocessing.LabelEncoder()
16+
self.request_status_label_encoder = preprocessing.LabelEncoder()
17+
self.request_size_label_encoder = preprocessing.LabelEncoder()
18+
self.time_taken_to_serve_label_encoder =preprocessing.LabelEncoder()
19+
self.user_agent_label_encoder =preprocessing.LabelEncoder()
20+
self.request_header_label_encoder = preprocessing.LabelEncoder()
21+
22+
self.scores = []
23+
self.client_ips = []
24+
self.request_methods = []
25+
self.request_status = []
26+
self.request_size = []
27+
self.times_taken_to_serve = []
28+
self.user_agents = []
29+
self.request_headers = []
30+
31+
self.dataset = []
32+
33+
def preprocess_time(self):
34+
timestamp_clusters = {}
35+
for row in self.logs:
36+
timestamp = get_microseconds(row[0])
37+
if timestamp not in timestamp_clusters:
38+
timestamp_clusters[timestamp]=0
39+
timestamp_clusters[timestamp] = timestamp_clusters[timestamp] + 1
40+
anomaly_scores = Anomaly().detect(timestamp_clusters)
41+
for row in self.logs:
42+
self.scores.append(anomaly_scores[row[0]])
43+
44+
def preprocess_client_ip(self):
45+
self.client_ip_label_encoder.fit([row[1] for row in self.logs])
46+
inst = [row[1] for row in self.logs]
47+
self.client_ips = self.client_ip_label_encoder.transform(inst)
48+
49+
def preprocess_request_method(self):
50+
self.request_method_label_encoder.fit([row[2] for row in self.logs])
51+
inst = [row[2] for row in self.logs]
52+
self.request_methods = self.request_method_label_encoder.transform(inst)
53+
54+
def preprocess_request_status(self):
55+
self.request_status_label_encoder.fit([row[3] for row in self.logs])
56+
inst = [row[3] for row in self.logs]
57+
self.request_status = self.request_status_label_encoder.transform(inst)
58+
59+
def preprocess_request_size(self):
60+
self.request_size_label_encoder.fit([row[4] for row in self.logs])
61+
inst = [row[4] for row in self.logs]
62+
self.request_size = self.request_size_label_encoder.transform(inst)
63+
64+
def preprocess_time_taken_to_serve(self):
65+
self.time_taken_to_serve_label_encoder.fit([row[5] for row in self.logs])
66+
inst = [row[5] for row in self.logs]
67+
self.times_taken_to_serve = self.time_taken_to_serve_label_encoder.transform(inst)
68+
69+
def proprocess_user_agent(self):
70+
self.user_agent_label_encoder.fit([row[6] for row in self.logs])
71+
inst = [row[6] for row in self.logs]
72+
self.user_agents = self.user_agent_label_encoder.transform(inst)
73+
74+
def preprocess_request_header(self):
75+
self.request_header_label_encoder.fit([row[7] for row in self.logs])
76+
inst = [row[7] for row in self.logs]
77+
self.request_headers = self.request_header_label_encoder.transform(inst)
78+
79+
def detransform_client_ip(self, client_ip_list):
80+
return self.client_ip_label_encoder.inverse_transform(client_ip_list)
81+
82+
def preprocess(self):
83+
84+
self.preprocess_time()
85+
self.preprocess_client_ip()
86+
self.preprocess_request_method()
87+
self.preprocess_request_status()
88+
self.preprocess_request_size()
89+
self.preprocess_time_taken_to_serve()
90+
self.proprocess_user_agent()
91+
self.preprocess_request_header()
92+
93+
dataset_size = len(self.logs)
94+
for i in range(dataset_size):
95+
obj = [
96+
self.logs[i][0],
97+
self.scores[i],
98+
self.client_ips[i],
99+
self.request_methods[i],
100+
self.request_status[i],
101+
self.request_size[i],
102+
self.times_taken_to_serve[i],
103+
self.user_agents[i],
104+
self.request_headers[i]
105+
]
106+
self.dataset.append(obj)
107+
108+
def read_dataset(self):
109+
self.preprocess()
110+
return self.dataset
111+
112+
113+
if __name__=='__main__':
114+
dataset_obj = Dataset()
115+
dataset_obj.preprocess()

Dataset.pyc

5.05 KB
Binary file not shown.

config/000-default.conf

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<VirtualHost *:80>
2+
# The ServerName directive sets the request scheme, hostname and port that
3+
# the server uses to identify itself. This is used when creating
4+
# redirection URLs. In the context of virtual hosts, the ServerName
5+
# specifies what hostname must appear in the request's Host: header to
6+
# match this virtual host. For the default virtual host (this file) this
7+
# value is not decisive as it is used as a last resort host regardless.
8+
# However, you must set it for any further virtual host explicitly.
9+
#ServerName www.example.com
10+
11+
ServerAdmin webmaster@localhost
12+
DocumentRoot /var/www/html
13+
14+
# Available loglevels: trace8, ..., trace1, debug, info, notice, warn,
15+
# error, crit, alert, emerg.
16+
# It is also possible to configure the loglevel for particular
17+
# modules, e.g.
18+
#LogLevel info ssl:warn
19+
20+
ErrorLog ${APACHE_LOG_DIR}/error.log
21+
#CustomLog ${APACHE_LOG_DIR}/access.log combined
22+
CustomLog ${APACHE_LOG_DIR}/custom.log dos
23+
24+
# For most configuration files from conf-available/, which are
25+
# enabled or disabled at a global level, it is possible to
26+
# include a line for only one particular virtual host. For example the
27+
# following line enables the CGI configuration for this host only
28+
# after it has been globally disabled with "a2disconf".
29+
#Include conf-available/serve-cgi-bin.conf
30+
</VirtualHost>
31+
32+
# vim: syntax=apache ts=4 sw=4 sts=4 sr noet

0 commit comments

Comments
 (0)