ソースを参照

very basic random forest, also moved the MLP to classifiers

Ethan Goldfarb 6 年 前
コミット
9c1dd5bcc8
2 ファイル変更41 行追加1 行削除
  1. 1 1
      src/classifiers/classifier.py
  2. 40 0
      src/classifiers/nearestneighbors.py

+ 1 - 1
src/classifiers/classifier.py

@@ -28,7 +28,7 @@ def main():
 
 
 # data and results arrays (training and testing) should be paired. Classifications is number of ways to classify data.
-def train(classifications: int, data: list, results: list, testdata: list, testresults: list):
+def multiLayerPerceptronClassifier(classifications: int, data: list, results: list, testdata: list, testresults: list):
     numberOfNeurons = (len(data[0]) + classifications)/2
     model = tf.keras.models.Sequential()
     model.add(tf.keras.layers.Flatten())

+ 40 - 0
src/classifiers/nearestneighbors.py

@@ -2,12 +2,15 @@
 from sklearn.model_selection import KFold
 from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
 from sklearn.ensemble import RandomForestClassifier
+from functools import reduce
+import tensorflow as tf
 import numpy as np
 import sys
 from Vector import FeatureVector
 
 DEFAULT_FEATURES = ["average_iat", "high.avg_burst_size", "high.burst_count"]
 
+
 def main():
     # a test of this method using an arbitrarily generated list of 5 vectors with
     # 3 features each
@@ -31,6 +34,7 @@ def main():
         _, p = t_test(res, labels)
         print("P-Value: %f" % (p / 2))
 
+
 def parse_args():
     import argparse
     parser = argparse.ArgumentParser(
@@ -55,6 +59,7 @@ def parse_args():
                         (default: 1)')
     return parser.parse_args()
 
+
 def kNearestNeighbors(data: list, labels: list,
                       n=5, verbose=0, k=5, weights="uniform", guesses=1):
     folds = KFold(n_splits=n)
@@ -85,16 +90,51 @@ def kNearestNeighbors(data: list, labels: list,
         accuracies.append(accuracy)
     return accuracies
 
+
+def multiLayerPerceptronClassifier(classifications: int, data: list, results: list, testdata: list, testresults: list):
+    numberOfNeurons = (len(data[0]) + classifications)/2
+    model = tf.keras.models.Sequential()
+    model.add(tf.keras.layers.Flatten())
+    model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
+    model.add(tf.keras.layers.Dense(numberOfNeurons, activation=tf.nn.relu))
+    model.add(tf.keras.layers.Dense(classifications, tf.nn.softmax))
+
+    model.compile(optimizer='SGD',
+                  loss='sparse_categorical_crossentropy',
+                  metrics=['accuracy'])
+    model.fit(data, results, epochs=5)
+
+    loss, accuracy = model.evaluate(testdata, testresults)
+    print(loss)
+    print(accuracy)
+
+
+def randomForest(data: list, labels: list, test_data: list, test_data_labels: list):
+    rfc = RandomForestClassifier(n_estimators=10)
+    rfc.fit(data, labels)
+    predictions = rfc.predict(test_data)
+    for t in range(len(test_data)):
+        print(str(test_data[t]) + "prediction: " + str(predictions[t]))
+    if len(test_data) == 0:
+        return
+    accuracysum = 0
+    for t in range(len(test_data)):
+        accuracysum += 1 if predictions[t] == test_data_labels[t] else 0
+    print("Accuracy: " + str(accuracysum/len(test_data_labels)))
+
+
 def find_in_predictions(probabilities: list, tests: int, labels: list):
     return [list(map(lambda x: x[0],
                      sorted(list(zip(labels, probs)), key=lambda x: x[1]))
     ).index(test)
             for probs, test in zip(probabilities, tests)]
 
+
 def t_test(accuracy: list, labels: list):
     from scipy import stats
     random_avg = 1.0/len(np.unique(labels))
     return stats.ttest_1samp(accuracy, random_avg)
 
+
 if __name__ == '__main__':
     main()