diff --git a/USSSALoader.py b/USSSALoader.py index 0a5262d..bc6b13b 100644 --- a/USSSALoader.py +++ b/USSSALoader.py @@ -81,5 +81,4 @@ def extractNamesDict(): print '\tImported %s'%filename return names if __name__ == "__main__": - getNameList() - \ No newline at end of file + getNameList() \ No newline at end of file diff --git a/genderPredictor.py b/genderPredictor.py index 816fa2c..cc855a5 100644 --- a/genderPredictor.py +++ b/genderPredictor.py @@ -14,12 +14,19 @@ def getFeatures(self): maleNames,femaleNames=self._loadNames() featureset = list() + for nameTuple in maleNames: features = self._nameFeatures(nameTuple[0]) + male_prob, female_prob = self._getProbDistr(nameTuple) + features['male_prob'] = male_prob + features['female_prob'] = female_prob featureset.append((features,'M')) for nameTuple in femaleNames: features = self._nameFeatures(nameTuple[0]) + male_prob, female_prob = self._getProbDistr(nameTuple) + features['male_prob'] = male_prob + features['female_prob'] = female_prob featureset.append((features,'F')) return featureset @@ -49,6 +56,17 @@ def train(self,train_set): def test(self,test_set): return classify.accuracy(self.classifier,test_set) + + def _getProbDistr(self,nameTuple): + male_prob = (nameTuple[1] * 1.0) / (nameTuple[1] + nameTuple[2]) + if male_prob == 1.0: + male_prob = 0.99 + elif male_prob == 0.0: + male_prob = 0.01 + else: + pass + female_prob = 1.0 - male_prob + return (male_prob, female_prob) def getMostInformativeFeatures(self,n=5): return self.classifier.most_informative_features(n) @@ -61,6 +79,7 @@ def _nameFeatures(self,name): return { 'last_letter': name[-1], 'last_two' : name[-2:], + 'last_three': name[-3:], 'last_is_vowel' : (name[-1] in 'AEIOUY') } @@ -72,6 +91,6 @@ def _nameFeatures(self,name): feats=gp.getMostInformativeFeatures(10) for feat in feats: print '\t%s = %s'%feat - - print '\nStephen is classified as %s'%gp.classify('Stephen') - \ No newline at end of file + name = raw_input('Enter name to classify: ') + print '\n%s is classified as %s'%(name, gp.classify(name)) +