|
|
@@ -25,7 +25,10 @@ def main():
|
|
|
for p in samples])
|
|
|
res = kNearestNeighbors(np.array(data), np.array(labels),
|
|
|
n=args.folds, verbose=args.verbose)
|
|
|
- print("Overall Accuracy: %f" % res)
|
|
|
+ print("Overall Accuracy: %f" % np.average(res))
|
|
|
+ if args.p_value:
|
|
|
+ _, p = t_test(res, labels)
|
|
|
+ print("P-Value: %f" % (p / 2))
|
|
|
|
|
|
def parse_args():
|
|
|
import argparse
|
|
|
@@ -39,12 +42,15 @@ def parse_args():
|
|
|
help='Number of cross-validation folds (default: 5)')
|
|
|
parser.add_argument('-f', '--feature', action='append', type=str,
|
|
|
help='Add feature to list of features to test with.')
|
|
|
+ parser.add_argument('-p', '--p-value', action='store_const', default=False,
|
|
|
+ const=True, help='Calculate a p-value from a t-test.')
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def kNearestNeighbors(data: list, labels: list, n=5, verbose=0):
|
|
|
folds = KFold(n_splits=n)
|
|
|
i = 1
|
|
|
avg = 0
|
|
|
+ accuracies = []
|
|
|
for train_index, test_index in folds.split(data):
|
|
|
if verbose >= 1:
|
|
|
print("Round %d:" % i)
|
|
|
@@ -58,8 +64,13 @@ def kNearestNeighbors(data: list, labels: list, n=5, verbose=0):
|
|
|
accuracy = correct.count(True)/len(correct)
|
|
|
if verbose >= 1:
|
|
|
print(accuracy)
|
|
|
- avg += accuracy
|
|
|
- return avg/n
|
|
|
+ accuracies.append(accuracy)
|
|
|
+ return accuracies
|
|
|
+
|
|
|
+def t_test(accuracy: list, labels: list):
|
|
|
+ from scipy import stats
|
|
|
+ random_avg = 1.0/len(np.unique(labels))
|
|
|
+ return stats.ttest_1samp(accuracy, random_avg)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|