|
|
@@ -1,10 +1,14 @@
|
|
|
#!/usr/bin/python3
|
|
|
from sklearn.model_selection import KFold
|
|
|
-from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
|
|
|
-from sklearn.ensemble import RandomForestClassifier
|
|
|
import numpy as np
|
|
|
-import sys
|
|
|
-from Vector import FeatureVector
|
|
|
+try:
|
|
|
+ import sample
|
|
|
+except ImportError:
|
|
|
+ import os
|
|
|
+ import sys
|
|
|
+ sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
|
|
|
+ '/../feature-extractor')
|
|
|
+ import sample
|
|
|
|
|
|
DEFAULT_FEATURES = ["average_iat", "high.avg_burst_size", "high.burst_count"]
|
|
|
|
|
|
@@ -21,6 +25,7 @@ def main():
|
|
|
from random import shuffle
|
|
|
shuffle(samples)
|
|
|
features = args.feature if args.feature else DEFAULT_FEATURES
|
|
|
+ from Vector import FeatureVector
|
|
|
data, labels = zip(*[(FeatureVector(p, features).get(), p.user)
|
|
|
for p in samples])
|
|
|
res = kNearestNeighbors(np.array(data), np.array(labels),
|
|
|
@@ -31,6 +36,7 @@ def main():
|
|
|
_, p = t_test(res, labels)
|
|
|
print("P-Value: %f" % (p / 2))
|
|
|
|
|
|
+
|
|
|
def parse_args():
|
|
|
import argparse
|
|
|
parser = argparse.ArgumentParser(
|
|
|
@@ -55,8 +61,10 @@ def parse_args():
|
|
|
(default: 1)')
|
|
|
return parser.parse_args()
|
|
|
|
|
|
+
|
|
|
def kNearestNeighbors(data: list, labels: list,
|
|
|
n=5, verbose=0, k=5, weights="uniform", guesses=1):
|
|
|
+ from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
|
|
|
folds = KFold(n_splits=n)
|
|
|
i = 1
|
|
|
avg = 0
|
|
|
@@ -85,16 +93,58 @@ def kNearestNeighbors(data: list, labels: list,
|
|
|
accuracies.append(accuracy)
|
|
|
return accuracies
|
|
|
|
|
|
+
|
|
|
+# TODO: This should be in a separate file.
|
|
|
+# If we need a unified interface we can make an aggregater.
|
|
|
+# TODO: KFold validation
|
|
|
+def multiLayerPerceptronClassifier(classifications: int, data: list, results: list, testdata: list, testresults: list):
|
|
|
+ import tensorflow as tf
|
|
|
+ numberOfNeurons = (len(data[0]) + classifications)/2
|
|
|
+ model = tf.keras.models.Sequential()
|
|
|
+ model.add(tf.keras.layers.Flatten())
|
|
|
+ model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
|
|
|
+ model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
|
|
|
+ model.add(tf.keras.layers.Dense(classifications, tf.nn.softmax))
|
|
|
+
|
|
|
+ model.compile(optimizer='SGD',
|
|
|
+ loss='sparse_categorical_crossentropy',
|
|
|
+ metrics=['accuracy'])
|
|
|
+ model.fit(data, results, epochs=5)
|
|
|
+
|
|
|
+ loss, accuracy = model.evaluate(testdata, testresults)
|
|
|
+ print(loss)
|
|
|
+ print(accuracy)
|
|
|
+
|
|
|
+# TODO: This should be in a separate file.
|
|
|
+# If we need a unified interface we can make an aggregater.
|
|
|
+# TODO: KFold validation
|
|
|
+def randomForest(data: list, labels: list, test_data: list, test_data_labels: list):
|
|
|
+ from sklearn.ensemble import RandomForestClassifier
|
|
|
+ rfc = RandomForestClassifier(n_estimators=10)
|
|
|
+ rfc.fit(data, labels)
|
|
|
+ predictions = rfc.predict(test_data)
|
|
|
+ for t in range(len(test_data)):
|
|
|
+ print(str(test_data[t]) + "prediction: " + str(predictions[t]))
|
|
|
+ if len(test_data) == 0:
|
|
|
+ return
|
|
|
+ accuracysum = 0
|
|
|
+ for t in range(len(test_data)):
|
|
|
+ accuracysum += 1 if predictions[t] == test_data_labels[t] else 0
|
|
|
+ print("Accuracy: " + str(accuracysum/len(test_data_labels)))
|
|
|
+
|
|
|
+
|
|
|
def find_in_predictions(probabilities: list, tests: int, labels: list):
|
|
|
return [list(map(lambda x: x[0],
|
|
|
sorted(list(zip(labels, probs)), key=lambda x: x[1]))
|
|
|
).index(test)
|
|
|
for probs, test in zip(probabilities, tests)]
|
|
|
|
|
|
+
|
|
|
def t_test(accuracy: list, labels: list):
|
|
|
from scipy import stats
|
|
|
random_avg = 1.0/len(np.unique(labels))
|
|
|
return stats.ttest_1samp(accuracy, random_avg)
|
|
|
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
|
main()
|