-
Notifications
You must be signed in to change notification settings - Fork 0
/
randomForest.py
37 lines (32 loc) · 1.54 KB
/
randomForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy
import pandas
import random
from decisionTree import buildDecisionTree, decisionTreePredictions
def trainTestSplit(dataFrame, testSize):
if isinstance(testSize, float):
testSize = round(testSize * len(dataFrame))
indices = dataFrame.index.tolist()
testIndices = random.sample(population = indices, k = testSize)
dataFrameTest = dataFrame.loc[testIndices]
dataFrameTrain = dataFrame.drop(testIndices)
return dataFrameTrain, dataFrameTest
def bootstrapSample(dataFrame, bootstrapSize):
randomIndices = numpy.random.randint(low = 0, high = len(dataFrame), size = bootstrapSize)
return dataFrame.iloc[randomIndices]
def createRandomForest(dataFrame, bootstrapSize, randomAttributes, randomSplits, forestSize = 20, treeMaxDepth = 1000):
forest = []
for i in range(forestSize):
bootstrappedDataFrame = bootstrapSample(dataFrame, bootstrapSize)
decisionTree = buildDecisionTree(bootstrappedDataFrame, maxDepth = treeMaxDepth, randomAttributes = randomAttributes, randomSplits = randomSplits)
forest.append(decisionTree)
return forest
def randomForestPredictions(dataFrame, randomForest):
predictions = {}
for i in range(len(randomForest)):
column = "decision tree " + str(i)
predictions[column] = decisionTreePredictions(dataFrame, randomForest[i])
predictions = pandas.DataFrame(predictions)
return predictions.mode(axis = 1)[0]
def calculateAccuracy(predictedResults, category):
resultCorrect = predictedResults == category
return resultCorrect.mean()