Procházet zdrojové kódy

partially updated for command line utility purposes

Ethan Goldfarb před 6 roky
rodič
revize
f9770c332a

+ 26 - 0
ethan_data_processing_scripts/Vector.py

@@ -2,12 +2,15 @@ try:
     import cPickle as pickle
 except ImportError:
     import pickle
+import sample
 
 
 class FeatureVector:
     def __init__(self):
         self.features = []
         self.activefeatures = []
+        # list of key, value tuples represnting values for features.
+        self.sampleInfo = {}
         self.classification = None
 
     # set which features are active using a binary list
@@ -25,6 +28,26 @@ class FeatureVector:
         return str(self.features)
 
 
+# use to make a sample into a vector
+def SampleToFeatureVector(s: sample):
+    print(s.__activities)
+    v = FeatureVector()
+    for a in s.__activities.keys():
+        for d in s.__activities[a].keys():
+            # a + d looks like: high.total_packets
+            v.sampleInfo.update({a + "." + d: s.__activities[a][d]})
+        v.features.extend(a["high"].values())
+        v.features.extend(a["mid"].values())
+        v.features.extend(a["low"].values())
+    v.sampleInfo.update(("total_time", s.total_time))
+    v.sampleInfo.update(("average_iat", s.average_iat))
+    v.sampleInfo.update(("dead_time", s.dead_time))
+    v.features.append(s.total_time)
+    v.features.append(s.average_iat)
+    v.features.append(s.dead_time)
+    return v
+
+
 def writePickledData(filename):
     v = FeatureVector()
     v.features = [1, 2, 3, 4]
@@ -45,6 +68,9 @@ def main():
     fv = FeatureVector()
     writePickledData("test.bin")
     readPickledData("test.txt")
+    s = sample.Sample(3)
+
+    fv = SampleToFeatureVector()
 
 
 if __name__ == '__main__':

+ 27 - 0
ethan_data_processing_scripts/runtests.py

@@ -0,0 +1,27 @@
+import sys
+import Vector
+import sample
+
+
+# test classifications on a pickled file of samples and denoting active features with
+# sample codes, ex: high.total_packets
+# Usage: main.py pickled_sample_file high.total_packets high.time_spent...
+def main():
+    i = 0
+    sampleList = Vector.readPickledData(sys.argv[1])
+    featureList = []
+    for s in sampleList:
+        featureList.append(Vector.SampleToFeatureVector(s))
+    activeFeatureStrings = []
+    for i in range(2, len(sys.argv)):
+        activeFeatureStrings.append(sys.argv[i])
+    for f in featureList:
+        temp = []
+        for s in activeFeatureStrings:
+            temp.append(f.sampleInfo[s])
+        f.activefeatures = temp
+    # perform classification on f here
+
+
+if __name__ == '__main__':
+    main()