|
|
@@ -2,12 +2,15 @@
|
|
|
from sklearn.model_selection import KFold
|
|
|
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
|
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
|
+from functools import reduce
|
|
|
+import tensorflow as tf
|
|
|
import numpy as np
|
|
|
import sys
|
|
|
from Vector import FeatureVector
|
|
|
|
|
|
DEFAULT_FEATURES = ["average_iat", "high.avg_burst_size", "high.burst_count"]
|
|
|
|
|
|
+
|
|
|
def main():
|
|
|
# a test of this method using an arbitrarily generated list of 5 vectors with
|
|
|
# 3 features each
|
|
|
@@ -31,6 +34,7 @@ def main():
|
|
|
_, p = t_test(res, labels)
|
|
|
print("P-Value: %f" % (p / 2))
|
|
|
|
|
|
+
|
|
|
def parse_args():
|
|
|
import argparse
|
|
|
parser = argparse.ArgumentParser(
|
|
|
@@ -55,6 +59,7 @@ def parse_args():
|
|
|
(default: 1)')
|
|
|
return parser.parse_args()
|
|
|
|
|
|
+
|
|
|
def kNearestNeighbors(data: list, labels: list,
|
|
|
n=5, verbose=0, k=5, weights="uniform", guesses=1):
|
|
|
folds = KFold(n_splits=n)
|
|
|
@@ -85,16 +90,51 @@ def kNearestNeighbors(data: list, labels: list,
|
|
|
accuracies.append(accuracy)
|
|
|
return accuracies
|
|
|
|
|
|
+
|
|
|
+def multiLayerPerceptronClassifier(classifications: int, data: list, results: list, testdata: list, testresults: list):
|
|
|
+ numberOfNeurons = (len(data[0]) + classifications)/2
|
|
|
+ model = tf.keras.models.Sequential()
|
|
|
+ model.add(tf.keras.layers.Flatten())
|
|
|
+ model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
|
|
|
+ model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
|
|
|
+ model.add(tf.keras.layers.Dense(classifications, tf.nn.softmax))
|
|
|
+
|
|
|
+ model.compile(optimizer='SGD',
|
|
|
+ loss='sparse_categorical_crossentropy',
|
|
|
+ metrics=['accuracy'])
|
|
|
+ model.fit(data, results, epochs=5)
|
|
|
+
|
|
|
+ loss, accuracy = model.evaluate(testdata, testresults)
|
|
|
+ print(loss)
|
|
|
+ print(accuracy)
|
|
|
+
|
|
|
+
|
|
|
+def randomForest(data: list, labels: list, test_data: list, test_data_labels: list):
|
|
|
+ rfc = RandomForestClassifier(n_estimators=10)
|
|
|
+ rfc.fit(data, labels)
|
|
|
+ predictions = rfc.predict(test_data)
|
|
|
+ for t in range(len(test_data)):
|
|
|
+ print(str(test_data[t]) + "prediction: " + str(predictions[t]))
|
|
|
+ if len(test_data) == 0:
|
|
|
+ return
|
|
|
+ accuracysum = 0
|
|
|
+ for t in range(len(test_data)):
|
|
|
+ accuracysum += 1 if predictions[t] == test_data_labels[t] else 0
|
|
|
+ print("Accuracy: " + str(accuracysum/len(test_data_labels)))
|
|
|
+
|
|
|
+
|
|
|
def find_in_predictions(probabilities: list, tests: int, labels: list):
|
|
|
return [list(map(lambda x: x[0],
|
|
|
sorted(list(zip(labels, probs)), key=lambda x: x[1]))
|
|
|
).index(test)
|
|
|
for probs, test in zip(probabilities, tests)]
|
|
|
|
|
|
+
|
|
|
def t_test(accuracy: list, labels: list):
|
|
|
from scipy import stats
|
|
|
random_avg = 1.0/len(np.unique(labels))
|
|
|
return stats.ttest_1samp(accuracy, random_avg)
|
|
|
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
|
main()
|