From b024c84043c22f7312ccd177436ee21548887af2 Mon Sep 17 00:00:00 2001 From: Bas Broek Date: Sat, 3 Dec 2016 08:13:08 +0100 Subject: [PATCH] Add Flesch-Kincaid tests and syllable counting (#9) * Add support for counting syllables * Add support for Flesch Reading Ease score and Flesch-Kincaid Grade Level * Use an extension to lowercase Character * Add syllable and Flesch examples to Readme * Add changelog * Fix typo * Remove creation information * Add syllable count and Flesch-tests to Example app * Rename README.md to Readme.md * Rename LICENSE to License * Update Readme.md --- Analysis/Classes/Analysis.swift | 56 +++-- Analysis/Classes/Character+Casing.swift | 12 ++ Analysis/Classes/Dictionary+Sorting.swift | 8 - Analysis/Classes/String+Analysis.swift | 8 - Analysis/Classes/SyllableCounter.swift | 203 ++++++++++++++++++ Changelog.md | 9 + Example/Analysis.xcodeproj/project.pbxproj | 4 + Example/Analysis/Analysis.storyboard | 143 +++++++++--- .../AnalysisTableViewController.swift | 40 +++- .../Analysis/UIViewController+Safari.swift | 32 +++ Example/Tests/AnalysisTests.swift | 16 ++ LICENSE => License | 0 README.md => Readme.md | 7 +- 13 files changed, 474 insertions(+), 64 deletions(-) create mode 100644 Analysis/Classes/Character+Casing.swift create mode 100755 Analysis/Classes/SyllableCounter.swift create mode 100644 Changelog.md create mode 100644 Example/Analysis/UIViewController+Safari.swift rename LICENSE => License (100%) rename README.md => Readme.md (88%) diff --git a/Analysis/Classes/Analysis.swift b/Analysis/Classes/Analysis.swift index d8b18b4..935b3e3 100644 --- a/Analysis/Classes/Analysis.swift +++ b/Analysis/Classes/Analysis.swift @@ -1,14 +1,6 @@ -// -// Analysis.swift -// -// -// Created by Bas Broek on 11/11/2016. -// -// - import Foundation -/// The option to when calculating average length. This is either `.word` or `.sentence`. +/// The option to use when calculating average length. This is either `.word` or `.sentence`. public enum LengthOption { case word case sentence @@ -17,6 +9,7 @@ public enum LengthOption { /// An analysis of a `String`. public struct Analysis { public typealias Percentage = Double + public typealias Grade = Double /// The string used to construct the `Analysis`. public let input: String @@ -63,6 +56,13 @@ public struct Analysis { } } + /// Returns the total amount of syllables of the `input`. + public func syllableCount() -> Int { + return words + .map { $0.syllables } + .reduce(0, +) + } + /// Returns the character count of the `input`. /// /// - Parameter includingSpaces: Indicating if characters @@ -87,7 +87,7 @@ public struct Analysis { private func _characterOccurences(caseSensitive: Bool = false) -> [Character: Int] { var occurrences: [Character: Int] = [:] characters - .map { (caseSensitive) ? $0 : Character(String(describing: $0).lowercased()) } + .map { (caseSensitive) ? $0 : $0.lowercased() } .forEach { occurrences[$0] = (occurrences[$0] ?? 0) + 1 } return occurrences } @@ -132,12 +132,22 @@ public struct Analysis { /// should be counted regardless of their case sensitivity. /// Defaults to `false`. public func occurrences(of character: Character, caseSensitive: Bool = false) -> Int { - let character = (caseSensitive) ? character : Character(String(describing: character).lowercased()) + let character = (caseSensitive) ? character : character.lowercased() return characters - .map { (caseSensitive) ? $0 : Character(String(describing: $0).lowercased()) } + .map { (caseSensitive) ? $0 : $0.lowercased() } .filter { $0 == character }.count } + /// Returns the syllables of every unique word. + public func wordSyllables() -> [String: Int] { + var syllables: [String: Int] = [:] + let uniqueWords = Array(_wordOccurrences(caseSensitive: false).keys) + + uniqueWords.forEach { syllables[$0] = $0.syllables } + + return syllables + } + /// Returns the frequency of the specified word. /// /// - Parameter caseSensitive: Indicating if words @@ -184,6 +194,28 @@ public struct Analysis { public var averageWordsPerSentence: Double { return Double(wordCount()) / Double(sentenceCount()) } + + private var _wordsPerSentences: Double { + return Double(wordCount()) / Double(sentenceCount()) + } + + private var _syllablesPerWords: Double { + return Double(syllableCount()) / Double(wordCount()) + } + + /// Returns the Flesch reading ease score. + /// + /// - Note: https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests#Flesch_reading_ease + public func fleschReadingEase() -> Percentage { + return 206.835 - 1.015 * _wordsPerSentences - 84.6 * _syllablesPerWords + } + + /// Returns the Flesch-Kincaid grade level. + /// + /// - Note: https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests#Flesch.E2.80.93Kincaid_grade_level + public func fleschKincaidGradeLevel() -> Grade { + return 0.39 * _wordsPerSentences + 11.8 * _syllablesPerWords - 15.59 + } } extension Analysis: Hashable { diff --git a/Analysis/Classes/Character+Casing.swift b/Analysis/Classes/Character+Casing.swift new file mode 100644 index 0000000..f634bab --- /dev/null +++ b/Analysis/Classes/Character+Casing.swift @@ -0,0 +1,12 @@ +import Foundation + +internal extension Character { + + func lowercased() -> Character { + return Character(String(describing: self).lowercased()) + } + + func uppercased() -> Character { + return Character(String(describing: self).uppercased()) + } +} diff --git a/Analysis/Classes/Dictionary+Sorting.swift b/Analysis/Classes/Dictionary+Sorting.swift index 2e29629..aa6267a 100644 --- a/Analysis/Classes/Dictionary+Sorting.swift +++ b/Analysis/Classes/Dictionary+Sorting.swift @@ -1,11 +1,3 @@ -// -// Dictionary+Sorting.swift -// -// -// Created by Bas Broek on 11/11/2016. -// -// - import Foundation /// The sort option of the dictionary. This is either `.key` or `.value`. diff --git a/Analysis/Classes/String+Analysis.swift b/Analysis/Classes/String+Analysis.swift index b8ad4d5..075b655 100644 --- a/Analysis/Classes/String+Analysis.swift +++ b/Analysis/Classes/String+Analysis.swift @@ -1,11 +1,3 @@ -// -// String+Analysis.swift -// -// -// Created by Bas Broek on 11/11/2016. -// -// - import Foundation public extension String { diff --git a/Analysis/Classes/SyllableCounter.swift b/Analysis/Classes/SyllableCounter.swift new file mode 100755 index 0000000..16d4042 --- /dev/null +++ b/Analysis/Classes/SyllableCounter.swift @@ -0,0 +1,203 @@ +// +// SyllableCounter.swift +// +// Created by Warren Freitag on 2/14/16. +// Copyright © 2016 Warren Freitag. All rights reserved. +// Licensed under the Apache 2.0 License. +// +// Adapted from a Java implementation created by Hugo "m09" Mougard. +// https://github.com/m09/syllable-counter +// + +import Foundation + +public class SyllableCounter { + + // MARK: - Shared instance + + public static let shared = SyllableCounter() + + // MARK: - Private properties + + private var exceptions: [String: Int] = [ + "brutes": 1, + "chummed": 1, + "flapped": 1, + "foamed": 1, + "gaped": 1, + "h'm": 1, + "lb": 1, + "mimes": 1, + "ms": 1, + "peeped": 1, + "sheered": 1, + "st": 1, + "queue": 1, + "none": 1, + "leaves": 1, + "awesome": 2, + "60": 2, + "capered": 2, + "caressed": 2, + "clattered": 2, + "deafened": 2, + "dr": 2, + "effaced": 2, + "effaces": 2, + "fringed": 2, + "greyish": 2, + "jr": 2, + "mangroves": 2, + "messieurs": 2, + "motioned": 2, + "moustaches": 2, + "mr": 2, + "mrs": 2, + "pencilled": 2, + "poleman": 2, + "quivered": 2, + "reclined": 2, + "shivered": 2, + "sidespring": 2, + "slandered": 2, + "sombre": 2, + "sr": 2, + "stammered": 2, + "suavely": 2, + "tottered": 2, + "trespassed": 2, + "truckle": 2, + "unstained": 2, + "therefore": 2, + "businesses": 3, + "bottleful": 3, + "discoloured": 3, + "disinterred": 3, + "hemispheres": 3, + "manoeuvred": 3, + "sepulchre": 3, + "shamefully": 3, + "unexpressed": 3, + "veriest": 3, + "wyoming": 3, + "etc": 4, + "sailmaker": 4, + "satiated": 4, + "sententiously": 4, + "particularized": 5, + "unostentatious": 5, + "propitiatory": 6, + ] + + private var addSyllables: [NSRegularExpression]! + private var subSyllables: [NSRegularExpression]! + + private let vowels: Set = ["a", "e", "i", "o", "u", "y"] + + // MARK: - Error enum + + private enum SyllableCounterError: Error { + case badRegex(String) + case badExceptionsData(String) + } + + // MARK: - Constructors + + public init() { + do { + try populateAddSyllables() + try populateSubSyllables() + } + catch SyllableCounterError.badRegex(let pattern) { + print("Bad Regex pattern: \(pattern)") + } + catch SyllableCounterError.badExceptionsData(let info) { + print("Problem parsing exceptions dataset: \(info)") + } + catch { + print("An unexpected error occured while initializing the syllable counter.") + } + } + + // MARK: - Setup + + private func populateAddSyllables() throws { + try addSyllables = buildRegexes(forPatterns: [ + "ia", "riet", "dien", "iu", "io", "ii", + "[aeiouy]bl$", "mbl$", "tl$", "sl$", "[aeiou]{3}", + "^mc", "ism$", "(.)(?!\\1)([aeiouy])\\2l$", "[^l]llien", "^coad.", + "^coag.", "^coal.", "^coax.", "(.)(?!\\1)[gq]ua(.)(?!\\2)[aeiou]", "dnt$", + "thm$", "ier$", "iest$", "[^aeiou][aeiouy]ing$"]) + } + + private func populateSubSyllables() throws { + try subSyllables = buildRegexes(forPatterns: [ + "cial", "cian", "tia", "cius", "cious", + "gui", "ion", "iou", "sia$", ".ely$", + "ves$", "geous$", "gious$", "[^aeiou]eful$", ".red$"]) + } + + private func buildRegexes(forPatterns patterns: [String]) throws -> [NSRegularExpression] { + return try patterns.map { pattern -> NSRegularExpression in + do { + let regex = try NSRegularExpression(pattern: pattern, options: [.caseInsensitive, .anchorsMatchLines]) + return regex + } + catch { + throw SyllableCounterError.badRegex(pattern) + } + } + } + + // MARK: - Public methods + + internal func count(word: String) -> Int { + if word.characters.count <= 1 { + return word.characters.count + } + + var mutatedWord = word.lowercased(with: Locale(identifier: "en_US")).trimmingCharacters(in: .punctuationCharacters) + + if let exceptionValue = exceptions[mutatedWord] { + return exceptionValue + } + + if mutatedWord.characters.last == "e" { + mutatedWord = String(mutatedWord.characters.dropLast()) + } + + var count = 0 + var previousIsVowel = false + + for character in mutatedWord.characters { + let isVowel = vowels.contains(character) + if isVowel && !previousIsVowel { + count += 1 + } + previousIsVowel = isVowel + } + + for pattern in addSyllables { + let matches = pattern.matches(in: mutatedWord, options: NSRegularExpression.MatchingOptions(rawValue: 0), range: NSRange(location: 0, length: mutatedWord.characters.count)) + if !matches.isEmpty { + count += 1 + } + } + + for pattern in subSyllables { + let matches = pattern.matches(in: mutatedWord, options: NSRegularExpression.MatchingOptions(rawValue: 0), range: NSRange(location: 0, length: mutatedWord.characters.count)) + if !matches.isEmpty { + count -= 1 + } + } + + return (count > 0) ? count : 1 + } +} + +extension String { + + internal var syllables: Int { + return SyllableCounter.shared.count(word: self) + } +} diff --git a/Changelog.md b/Changelog.md new file mode 100644 index 0000000..f792f45 --- /dev/null +++ b/Changelog.md @@ -0,0 +1,9 @@ +# next + +- Added `syllableCount()`, which counts the total amount of syllables of the `input`. +- Added `wordSyllables()`, which returns the syllables of every unique word. +- Added `fleschReadingEase()`, which calculates the [Flesch reading ease score](https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests#Flesch_reading_ease). +- Added `fleschKincaidGradeLevel()`, which calculates the [Flesch-Kincaid grade level](https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests#Flesch.E2.80.93Kincaid_grade_level). + +# [0.1.0](https://github.com/BasThomas/Analysis/releases/tag/0.1.0) +Initial release. diff --git a/Example/Analysis.xcodeproj/project.pbxproj b/Example/Analysis.xcodeproj/project.pbxproj index 594d646..a7d1bfa 100644 --- a/Example/Analysis.xcodeproj/project.pbxproj +++ b/Example/Analysis.xcodeproj/project.pbxproj @@ -9,6 +9,7 @@ /* Begin PBXBuildFile section */ 4978479C1DD87E4A003CFFBB /* Analysis.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4978479B1DD87E4A003CFFBB /* Analysis.storyboard */; }; 4978479E1DD88148003CFFBB /* AnalysisTableViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4978479D1DD88148003CFFBB /* AnalysisTableViewController.swift */; }; + 4985BEC11DF1959500B36F51 /* UIViewController+Safari.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4985BEC01DF1959500B36F51 /* UIViewController+Safari.swift */; }; 607FACD61AFB9204008FA782 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 607FACD51AFB9204008FA782 /* AppDelegate.swift */; }; 607FACD81AFB9204008FA782 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 607FACD71AFB9204008FA782 /* ViewController.swift */; }; 607FACDB1AFB9204008FA782 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 607FACD91AFB9204008FA782 /* Main.storyboard */; }; @@ -36,6 +37,7 @@ 45E49C4F9720F93380BF3A10 /* Pods-Analysis_Tests.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Analysis_Tests.debug.xcconfig"; path = "Pods/Target Support Files/Pods-Analysis_Tests/Pods-Analysis_Tests.debug.xcconfig"; sourceTree = ""; }; 4978479B1DD87E4A003CFFBB /* Analysis.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; path = Analysis.storyboard; sourceTree = ""; }; 4978479D1DD88148003CFFBB /* AnalysisTableViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnalysisTableViewController.swift; sourceTree = ""; }; + 4985BEC01DF1959500B36F51 /* UIViewController+Safari.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "UIViewController+Safari.swift"; sourceTree = ""; }; 607FACD01AFB9204008FA782 /* Analysis_Example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Analysis_Example.app; sourceTree = BUILT_PRODUCTS_DIR; }; 607FACD41AFB9204008FA782 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 607FACD51AFB9204008FA782 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; @@ -111,6 +113,7 @@ 607FACD51AFB9204008FA782 /* AppDelegate.swift */, 607FACD71AFB9204008FA782 /* ViewController.swift */, 4978479D1DD88148003CFFBB /* AnalysisTableViewController.swift */, + 4985BEC01DF1959500B36F51 /* UIViewController+Safari.swift */, 607FACD91AFB9204008FA782 /* Main.storyboard */, 4978479B1DD87E4A003CFFBB /* Analysis.storyboard */, 607FACDC1AFB9204008FA782 /* Images.xcassets */, @@ -370,6 +373,7 @@ files = ( 607FACD81AFB9204008FA782 /* ViewController.swift in Sources */, 607FACD61AFB9204008FA782 /* AppDelegate.swift in Sources */, + 4985BEC11DF1959500B36F51 /* UIViewController+Safari.swift in Sources */, 4978479E1DD88148003CFFBB /* AnalysisTableViewController.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/Example/Analysis/Analysis.storyboard b/Example/Analysis/Analysis.storyboard index 42ebdb2..a6654f0 100644 --- a/Example/Analysis/Analysis.storyboard +++ b/Example/Analysis/Analysis.storyboard @@ -21,21 +21,21 @@ - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + +