浏览代码

in command form, needs testing

Ethan Goldfarb 6 年之前
父节点
当前提交
03b8d615e7

+ 3 - 2
ethan_data_processing_scripts/Vector.py

@@ -5,11 +5,11 @@ except ImportError:
     import pickle
 import os
 import sys
-sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
-                '/../src/feature-extractor')
 import sample
 import typing
 from typing import List
+sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
+                '/../src/feature-extractor')
 
 
 class FeatureVector:
@@ -31,6 +31,7 @@ class FeatureVector:
     def __repr__(self):
         return str(self.activefeatures)
 
+
 def writePickledData(filename):
     v = FeatureVector()
     v.features = [1, 2, 3, 4]

+ 4 - 1
ethan_data_processing_scripts/nearestneighbors.py

@@ -1,4 +1,5 @@
 #!/usr/bin/python3
+from sklearn.model_selection import KFold
 from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
 from sklearn.ensemble import RandomForestClassifier
 import numpy as np
@@ -32,7 +33,8 @@ def main():
     # kNearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]],
     #                    ["three", 2, 3, "5"], [[1, 1, 0], [0, 5, 5]])
 
-def kNearestNeighbors(data: list, classifications: list):
+
+def kNearestNeighbors(data: list, classifications: list, test_data: list):
     folds = KFold(n_splits=5)
     for train_index, test_index in folds.split(data):
         kn = KNeighborsClassifier(n_neighbors=2)
@@ -49,6 +51,7 @@ def kNearestNeighbors(data: list, classifications: list):
     #     with open("results.txt", "w+") as file:
     #         file.write(writestr)
 
+
 def nearestNeighbors(data: list, test_data: list):
     x = np.array(data)
     nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(x)

+ 5 - 2
ethan_data_processing_scripts/runtests.py

@@ -12,15 +12,18 @@ def main():
     i = 0
     sampleList = Vector.readPickledData(sys.argv[1])
     featureList = []
+    classifications = []
     for s in sampleList:
-        featureList.append(Vector.FeatureVector(s))
+        v = Vector.FeatureVector(s)
+        featureList.append(v)
+        classifications.append(v.classification)
     activeFeatureStrings = []
     for i in range(2, len(sys.argv)):
         activeFeatureStrings.append(sys.argv[i])
     for f in featureList:
         f.set_features(activeFeatureStrings)
     # perform classification on f here
-    nearestneighbors.kNearestNeighbors()
+    nearestneighbors.kNearestNeighbors(featureList[:8 * len(featureList)//10], classifications[:8 * len(classifications)//10], featureList[8 * len(featureList)//10:])
 
 
 if __name__ == '__main__':

+ 1 - 0
src/feature-extractor/sample.py

@@ -4,6 +4,7 @@ import typing
 from typing import List
 from common import window
 
+
 class Sample:
     EPOCH = datetime(1970, 1, 1)
     TIME_FMT = '%Y-%m-%d %H:%M:%S.%f'