-
-
Notifications
You must be signed in to change notification settings - Fork 349
Optimized CommentedString.validString #1067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| extension Collection where Element: BinaryInteger, Index == Int { | ||
|
|
||
| @inlinable | ||
| @inline(__always) | ||
| func containsCString<T: BidirectionalCollection>(_ cString: T) -> Bool where T.Element: BinaryInteger, T.Index == Int { | ||
| guard !cString.isEmpty else { return true } | ||
|
|
||
| // Drop null terminator if present | ||
| let subarrayCount = cString.last == 0 | ||
| ? cString.count - 1 | ||
| : cString.count | ||
|
|
||
| guard subarrayCount <= count else { return false } | ||
|
|
||
| let lastSubarrayStartingPos = count - subarrayCount | ||
| var i = 0 | ||
| while i <= lastSubarrayStartingPos { | ||
| var match = true | ||
| var j = 0 | ||
| while j < subarrayCount { | ||
| if self[i + j] != cString[j] { | ||
| match = false | ||
| break | ||
| } | ||
| j += 1 | ||
| } | ||
| if match { | ||
| return true | ||
| } | ||
|
|
||
| i += 1 | ||
| } | ||
| return false | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,5 +1,31 @@ | ||||||
| import Foundation | ||||||
|
|
||||||
| private extension UInt8 { | ||||||
| static let tab: UInt8 = 9 // '\t' | ||||||
| static let newline: UInt8 = 10 // '\n' | ||||||
| static let backslash: UInt8 = 92 // '\' | ||||||
| static let underscore: UInt8 = 95 // '_' | ||||||
| static let doubleQuotes: UInt8 = 34 // '"' | ||||||
| static let dollar: UInt8 = 36 // '$' | ||||||
| static let slash: UInt8 = 47 // '/' | ||||||
|
|
||||||
| static let dot: UInt8 = 46 // '.' | ||||||
| static let nine: UInt8 = 57 // '9' | ||||||
|
|
||||||
| static let capitalA: UInt8 = 65 // 'A' | ||||||
| static let capitalZ: UInt8 = 90 // 'Z' | ||||||
|
|
||||||
| static let smallA: UInt8 = 97 // 'a' | ||||||
| static let smallN: UInt8 = 110 // 'n' | ||||||
| static let smallT: UInt8 = 116 // 't' | ||||||
| static let smallZ: UInt8 = 122 // 'z' | ||||||
| } | ||||||
|
|
||||||
| private extension ContiguousArray<CChar> { | ||||||
| static let slashesUTF8CString = "//".utf8CString | ||||||
| static let threeUnderscoresUTF8CString = "___".utf8CString | ||||||
| } | ||||||
|
|
||||||
| /// String that includes a comment | ||||||
| struct CommentedString { | ||||||
| /// Entity string value. | ||||||
|
|
@@ -18,19 +44,6 @@ struct CommentedString { | |||||
| self.comment = comment | ||||||
| } | ||||||
|
|
||||||
| /// Set of characters that are invalid. | ||||||
| private static let invalidCharacters: CharacterSet = { | ||||||
| var invalidSet = CharacterSet(charactersIn: "_$") | ||||||
| invalidSet.insert(charactersIn: UnicodeScalar(".") ... UnicodeScalar("9")) | ||||||
| invalidSet.insert(charactersIn: UnicodeScalar("A") ... UnicodeScalar("Z")) | ||||||
| invalidSet.insert(charactersIn: UnicodeScalar("a") ... UnicodeScalar("z")) | ||||||
| invalidSet.invert() | ||||||
| return invalidSet | ||||||
| }() | ||||||
|
|
||||||
| /// Set of characters that are invalid. | ||||||
| private static let specialCheckCharacters = CharacterSet(charactersIn: "_/") | ||||||
|
|
||||||
| /// Returns a valid string for Xcode projects. | ||||||
| var validString: String { | ||||||
| switch string { | ||||||
|
|
@@ -40,31 +53,38 @@ struct CommentedString { | |||||
| default: break | ||||||
| } | ||||||
|
|
||||||
| if string.rangeOfCharacter(from: CommentedString.invalidCharacters) == nil { | ||||||
| if string.rangeOfCharacter(from: CommentedString.specialCheckCharacters) == nil { | ||||||
| return string | ||||||
| } else if !string.contains("//"), !string.contains("___") { | ||||||
| return string | ||||||
| var str = string | ||||||
| return str.withUTF8 { buf -> String in | ||||||
| let containsInvalidCharacters = buf.containsInvalidCharacters | ||||||
|
|
||||||
| if !containsInvalidCharacters { | ||||||
| let containsSpecialCheckCharacters = buf.containsSpecialCheckCharacters | ||||||
|
|
||||||
| if !containsSpecialCheckCharacters { | ||||||
| return string | ||||||
| } else if !buf.containsCString(ContiguousArray.slashesUTF8CString), | ||||||
| !buf.containsCString(ContiguousArray.threeUnderscoresUTF8CString) { | ||||||
| return string | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| let escaped = string.reduce(into: "") { escaped, character in | ||||||
| // As an optimization, only look at the first scalar. This means we're doing a numeric comparison instead | ||||||
| // of comparing arbitrary-length characters. This is safe because all our cases are a single scalar. | ||||||
| switch character.unicodeScalars.first { | ||||||
| case "\\": | ||||||
| escaped.append("\\\\") | ||||||
| case "\"": | ||||||
| escaped.append("\\\"") | ||||||
| case "\t": | ||||||
| escaped.append("\\t") | ||||||
| case "\n": | ||||||
| escaped.append("\\n") | ||||||
| default: | ||||||
| escaped.append(character) | ||||||
| // calculate exact size | ||||||
| let escapedCapacity = buf.escapedCommentCapacity | ||||||
|
|
||||||
| if #available(iOS 14.0, macOS 11.0, tvOS 14.0, watchOS 7.0, visionOS 1.0, *) { | ||||||
| // write directly into String storage | ||||||
| return String(unsafeUninitializedCapacity: escapedCapacity) { dst in | ||||||
| dst.fillValidString(from: buf) | ||||||
|
|
||||||
| return escapedCapacity | ||||||
| } | ||||||
| } else { | ||||||
| let validStringBuffer = UnsafeMutableBufferPointer<UInt8>.allocate(capacity: escapedCapacity) | ||||||
| validStringBuffer.fillValidString(from: buf) | ||||||
|
|
||||||
| return String(decoding: validStringBuffer, as: UTF8.self) | ||||||
| } | ||||||
| } | ||||||
| return "\"\(escaped)\"" | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -95,3 +115,107 @@ extension CommentedString: ExpressibleByStringLiteral { | |||||
| self.init(value) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| // MARK: - Private | ||||||
|
|
||||||
| private extension UnsafeMutableBufferPointer<UInt8> { | ||||||
| /// Fills preallocated `UnsafeBufferPointer<UInt8>` | ||||||
| func fillValidString(from buf: UnsafeBufferPointer<UInt8>) { | ||||||
| var outIndex = 0 | ||||||
|
|
||||||
| self[outIndex] = .doubleQuotes | ||||||
| outIndex += 1 | ||||||
|
|
||||||
| for ch in buf { | ||||||
| switch ch { | ||||||
| case .backslash: | ||||||
| self[outIndex] = .backslash | ||||||
| self[outIndex + 1] = .backslash | ||||||
| outIndex += 2 | ||||||
|
|
||||||
| case .doubleQuotes: | ||||||
| self[outIndex] = .backslash | ||||||
| self[outIndex + 1] = .doubleQuotes | ||||||
| outIndex += 2 | ||||||
|
|
||||||
| case .tab: | ||||||
| self[outIndex] = .backslash | ||||||
| self[outIndex + 1] = .smallT | ||||||
| outIndex += 2 | ||||||
|
|
||||||
| case .newline: | ||||||
| self[outIndex] = .backslash | ||||||
| self[outIndex + 1] = .smallN | ||||||
| outIndex += 2 | ||||||
|
|
||||||
| default: | ||||||
| self[outIndex] = ch | ||||||
| outIndex += 1 | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| self[outIndex] = .doubleQuotes | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| private extension UnsafeBufferPointer<UInt8> { | ||||||
| /// Valid characters are: | ||||||
| /// 1. `_` and `$` | ||||||
| /// 2. `.`...`9` | ||||||
| /// 3. `A`...`Z` | ||||||
| /// 4. `a`...`z` | ||||||
| var containsInvalidCharacters: Bool { | ||||||
| for ch in self { | ||||||
| // ch == '_' || ch == '$' | ||||||
| if ch == .underscore || ch == .dollar { | ||||||
| continue | ||||||
| } | ||||||
| // ch >= '.' && ch <= '9' | ||||||
| if ch >= .dot && ch <= .nine { | ||||||
| continue | ||||||
| } | ||||||
| // ch >= 'A' && ch <= 'Z' | ||||||
| if ch >= .capitalA && ch <= .capitalZ { | ||||||
| continue | ||||||
| } | ||||||
| // ch >= 'a' && ch <= 'z' | ||||||
| if ch >= .smallA && ch <= .smallZ { | ||||||
| continue | ||||||
| } | ||||||
|
|
||||||
| return true | ||||||
| } | ||||||
|
|
||||||
| return false | ||||||
| } | ||||||
|
|
||||||
| /// Special check characters are `_` and `/` | ||||||
| var containsSpecialCheckCharacters: Bool { | ||||||
| for ch in self { | ||||||
| if ch == .underscore || ch == .slash { | ||||||
| return true | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return false | ||||||
| } | ||||||
|
|
||||||
| /// Calculates escaped string size | ||||||
| /// Basically, `count + count(where: { [.backslash, .doubleQuotes, .tab, .newline].contains($0) }` | ||||||
| var escapedCommentCapacity: Int { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we're going through all chars of |
||||||
| var escapeCount = 0 | ||||||
|
|
||||||
| for ch in self { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
would limit the usage of such short variable names (I would change this across the PR) |
||||||
| switch ch { | ||||||
| case .backslash, .doubleQuotes, .tab, .newline: | ||||||
| escapeCount += 1 // each adds one extra byte | ||||||
| default: | ||||||
| break | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return count // original bytes | ||||||
| + escapeCount // extra escape bytes | ||||||
| + 2 // surrounding quotes | ||||||
| } | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe we can bump the minimum versions in
Package.swift, we don't need to support platforms older than this.