Quellcode durchsuchen

made knearestneighbors a command line utility

Ethan Goldfarb vor 6 Jahren
Ursprung
Commit
dd86396b7b

+ 8 - 6
ethan_data_processing_scripts/Vector.py

@@ -22,26 +22,28 @@ class FeatureVector:
         self.activefeatures = activefeatures
 
     def __repr__(self):
-        for f in self.features:
-            print(f)
-        print("Classification:" + self.classification)
+        return str(self.features)
 
 
 def writePickledData(filename):
+    v = FeatureVector()
+    v.features = [1, 2, 3, 4]
+    v.activefeatures = [1, 2, 3, 4]
+    vs = [v,v]
     with open(filename, 'wb') as file:
-        pickle.dump(set([1, 2, 3]), file)
+        pickle.dump(vs, file)
 
 
 def readPickledData(filename):
     with open(filename, 'rb') as file:
         x = pickle.load(file)
-        print(x)
+        # print(x)
     return x
 
 
 def main():
     fv = FeatureVector()
-    writePickledData("test.txt")
+    writePickledData("test.bin")
     readPickledData("test.txt")
 
 

+ 21 - 1
ethan_data_processing_scripts/nearestneighbors.py

@@ -1,16 +1,36 @@
 from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
 import numpy as np
+import sys
+from Vector import *
 
 
 def main():
     # a test of this method using an arbitrarily generated list of 5 vectors with 3 features each
     # nearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], [[1, 1, 4]])
-    kNearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], ["three", 2, 3, "5"], [[1, 1, 0], [0, 5, 5]])
+    print(len(sys.argv))
+    if len(sys.argv) != 4:
+        print("Usage: nearestneighbors.py datafile classificationsfile testdatafile")
+        exit()
+    data = readPickledData(sys.argv[1])
+    classifcations = readPickledData(sys.argv[2])
+    testdata = readPickledData(sys.argv[3])
+    newdata, newtest = [], []
+    for d in data:
+        newdata.append(d.features)
+    for d in testdata:
+        newtest.append(d.features)
+    print(newdata)
+    print(classifcations)
+    print(newtest)
+    kNearestNeighbors(newdata, classifcations, newtest)
+    # kNearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], ["three", 2, 3, "5"], [[1, 1, 0], [0, 5, 5]])
 
 
 def kNearestNeighbors(data: list, classifications: list, test_data: list):
     kn = KNeighborsClassifier(n_neighbors=2)
     kn.fit(data, classifications)
+    print("Predictions, matching test_data by index: ")
+    print(test_data)
     print(kn.predict(test_data))