6 years ago · d03562d942
--- a/ethan_data_processing_scripts/nearestneighbors.py
+++ b/ethan_data_processing_scripts/nearestneighbors.py
@@ -1,3 +1,4 @@
 
				+#!/usr/bin/python3
			
 
				 from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
			
 
				 from sklearn.ensemble import RandomForestClassifier
			
 
				 import numpy as np
			
@@ -6,11 +7,13 @@ from Vector import *
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # a test of this method using an arbitrarily generated list of 5 vectors with 3 features each
			
 
				+    # a test of this method using an arbitrarily generated list of 5 vectors with
			
 
				+    # 3 features each
			
 
				     # nearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], [[1, 1, 4]])
			
 
				     print(len(sys.argv))
			
 
				     if len(sys.argv) != 5:
			
 
				-        print("Usage: nearestneighbors.py datafile.bin classificationsfile.bin testdatafile.bin -(p/e)")
			
 
				+        print("Usage: nearestneighbors.py datafile.bin classificationsfile.bin " \
			
 
				+              "testdatafile.bin -(p/e)")
			
 
				         exit()
			
 
				     data = readPickledData(sys.argv[1])
			
 
				     classifcations = readPickledData(sys.argv[2])
			
@@ -26,23 +29,25 @@ def main():
 
				     kNearestNeighbors(newdata, classifcations, newtest)
			
 
				     # print("Random Forest:")
			
 
				     # randomForest(newdata, classifcations, newtest)
			
 
				-    # kNearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], ["three", 2, 3, "5"], [[1, 1, 0], [0, 5, 5]])
			
 
				-
			
 
				-
			
 
				-def kNearestNeighbors(data: list, classifications: list, test_data: list):
			
 
				-    kn = KNeighborsClassifier(n_neighbors=2)
			
 
				-    kn.fit(data, classifications)
			
 
				-    p = kn.predict(test_data)
			
 
				-    print("Predictions, matching test_data by index: ")
			
 
				-    print(test_data)
			
 
				-    print(p)
			
 
				-    writestr = "Predictions, matching test_data by index:\n" + str(test_data) + "\n" + str(p)
			
 
				-    if sys.argv[4][1] == 'p':
			
 
				-        pickle.dump((test_data, p), open("results.bin", "wb"))
			
 
				-    else:
			
 
				-        with open("results.txt", "w+") as file:
			
 
				-            file.write(writestr)
			
 
				+    # kNearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]],
			
 
				+    #                    ["three", 2, 3, "5"], [[1, 1, 0], [0, 5, 5]])
			
 
				 
			
 
				+def kNearestNeighbors(data: list, classifications: list):
			
 
				+    folds = KFold(n_splits=5)
			
 
				+    for train_index, test_index in folds.split(data):
			
 
				+        kn = KNeighborsClassifier(n_neighbors=2)
			
 
				+        kn.fit(data[train_index], classifications[train_index])
			
 
				+        p = kn.predict(test_data[test_index])
			
 
				+        print("Predictions, matching test_data by index: ")
			
 
				+        print(test_data[test_index])
			
 
				+        print(p)
			
 
				+        writestr = "Predictions, matching test_data by index:\n" + str(test_data) \
			
 
				+                   + "\n" + str(p)
			
 
				+    # if sys.argv[4][1] == 'p':
			
 
				+    #     pickle.dump((test_data, p), open("results.bin", "wb"))
			
 
				+    # else:
			
 
				+    #     with open("results.txt", "w+") as file:
			
 
				+    #         file.write(writestr)
			
 
				 
			
 
				 def nearestNeighbors(data: list, test_data: list):
			
 
				     x = np.array(data)
			
--- a/ethan_data_processing_scripts/runtests.py
+++ b/ethan_data_processing_scripts/runtests.py
@@ -1,3 +1,4 @@
 
				+#!/usr/bin/python3
			
 
				 import sys
			
 
				 import Vector
			
 
				 import sample
			
@@ -12,15 +13,12 @@ def main():
 
				     sampleList = Vector.readPickledData(sys.argv[1])
			
 
				     featureList = []
			
 
				     for s in sampleList:
			
 
				-        featureList.append(Vector.SampleToFeatureVector(s))
			
 
				+        featureList.append(Vector.FeatureVector(s))
			
 
				     activeFeatureStrings = []
			
 
				     for i in range(2, len(sys.argv)):
			
 
				         activeFeatureStrings.append(sys.argv[i])
			
 
				     for f in featureList:
			
 
				-        temp = []
			
 
				-        for s in activeFeatureStrings:
			
 
				-            temp.append(f.sampleInfo[s])
			
 
				-        f.activefeatures = temp
			
 
				+        f.set_features(activeFeatureStrings)
			
 
				     # perform classification on f here
			
 
				     nearestneighbors.kNearestNeighbors()