Skip to content

Commit 8d6539b

Browse files
committed
Remove Entropy
Preparing for Random -> Entropy renaming
1 parent 5bfa3a9 commit 8d6539b

File tree

8 files changed

+200
-316
lines changed

8 files changed

+200
-316
lines changed

EntropyString.xcodeproj/project.pbxproj

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
/* Begin PBXBuildFile section */
1010
E205C6AE1F1BFF66007C139E /* EntropyString.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = E205C6A51F1BFF66007C139E /* EntropyString.framework */; };
1111
E205C6BC1F1BFFEC007C139E /* Random.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */; };
12-
E205C6BD1F1BFFEC007C139E /* Entropy.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */; };
1312
E205C6EB1F1C02E4007C139E /* EntropyString.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = E205C6E21F1C02E3007C139E /* EntropyString.framework */; };
1413
E205C6F91F1C030E007C139E /* Random.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */; };
15-
E205C6FA1F1C030E007C139E /* Entropy.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */; };
1614
E21779C81F3E7E260019CC42 /* Bytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = E21779C71F3E7E260019CC42 /* Bytes.swift */; };
1715
E21779C91F3E7E260019CC42 /* Bytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = E21779C71F3E7E260019CC42 /* Bytes.swift */; };
1816
E21779CA1F3E7E260019CC42 /* Bytes.swift in Sources */ = {isa = PBXBuildFile; fileRef = E21779C71F3E7E260019CC42 /* Bytes.swift */; };
@@ -21,16 +19,11 @@
2119
E287ADA31F0E9B9A00DE6DF5 /* EntropyString.h in Headers */ = {isa = PBXBuildFile; fileRef = E287AD951F0E9B9A00DE6DF5 /* EntropyString.h */; settings = {ATTRIBUTES = (Public, ); }; };
2220
E287ADAE1F0E9DDC00DE6DF5 /* Random.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */; };
2321
E287ADB51F0E9DEE00DE6DF5 /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = E287ADB21F0E9DEE00DE6DF5 /* Info.plist */; };
24-
E287ADB91F0EDCAB00DE6DF5 /* Entropy.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */; };
2522
E2B8C8171F3E2B8500171502 /* CharSet.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2B8C8161F3E2B8500171502 /* CharSet.swift */; };
26-
E2D2DD461F436A5F006846F7 /* Entropy.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */; };
2723
E2D2DD471F436AB3006846F7 /* Random.swift in Sources */ = {isa = PBXBuildFile; fileRef = E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */; };
2824
E2D2DD481F436AB6006846F7 /* CharSet.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2B8C8161F3E2B8500171502 /* CharSet.swift */; };
2925
E2D2DD491F436AB7006846F7 /* CharSet.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2B8C8161F3E2B8500171502 /* CharSet.swift */; };
3026
E2D2DD4A1F436AB8006846F7 /* CharSet.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2B8C8161F3E2B8500171502 /* CharSet.swift */; };
31-
E2E1404F1F422C70003E02B0 /* EntropyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E1404E1F422C70003E02B0 /* EntropyTests.swift */; };
32-
E2E140501F422C70003E02B0 /* EntropyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E1404E1F422C70003E02B0 /* EntropyTests.swift */; };
33-
E2E140511F422C70003E02B0 /* EntropyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E1404E1F422C70003E02B0 /* EntropyTests.swift */; };
3427
E2E140531F422FB1003E02B0 /* RandomTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E140521F422FB1003E02B0 /* RandomTests.swift */; };
3528
E2E140541F422FB1003E02B0 /* RandomTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E140521F422FB1003E02B0 /* RandomTests.swift */; };
3629
E2E140551F422FB1003E02B0 /* RandomTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E2E140521F422FB1003E02B0 /* RandomTests.swift */; };
@@ -79,11 +72,9 @@
7972
E287AD9B1F0E9B9A00DE6DF5 /* EntropyString-iOSTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "EntropyString-iOSTests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
8073
E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Random.swift; sourceTree = "<group>"; };
8174
E287ADB21F0E9DEE00DE6DF5 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
82-
E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Entropy.swift; sourceTree = "<group>"; };
8375
E287ADBA1F0EE3B600DE6DF5 /* EntropyString.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; path = EntropyString.playground; sourceTree = "<group>"; };
8476
E2B8C8161F3E2B8500171502 /* CharSet.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CharSet.swift; sourceTree = "<group>"; };
8577
E2DDF28D1F68EDD700CE76CB /* Presentation.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; name = Presentation.playground; path = ../Presentation/Swift/Presentation.playground; sourceTree = "<group>"; };
86-
E2E1404E1F422C70003E02B0 /* EntropyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EntropyTests.swift; sourceTree = "<group>"; };
8778
E2E140521F422FB1003E02B0 /* RandomTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RandomTests.swift; sourceTree = "<group>"; };
8879
E2E140561F433A6D003E02B0 /* CharSetTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CharSetTests.swift; sourceTree = "<group>"; };
8980
/* End PBXFileReference section */
@@ -182,7 +173,6 @@
182173
E287ADAC1F0E9DDC00DE6DF5 /* Sources */ = {
183174
isa = PBXGroup;
184175
children = (
185-
E287ADB81F0EDCAB00DE6DF5 /* Entropy.swift */,
186176
E2B8C8161F3E2B8500171502 /* CharSet.swift */,
187177
E287ADAD1F0E9DDC00DE6DF5 /* Random.swift */,
188178
E21779C71F3E7E260019CC42 /* Bytes.swift */,
@@ -201,7 +191,6 @@
201191
E287ADB01F0E9DEE00DE6DF5 /* EntropyStringTests */ = {
202192
isa = PBXGroup;
203193
children = (
204-
E2E1404E1F422C70003E02B0 /* EntropyTests.swift */,
205194
E2E140561F433A6D003E02B0 /* CharSetTests.swift */,
206195
E2E140521F422FB1003E02B0 /* RandomTests.swift */,
207196
E287ADB21F0E9DEE00DE6DF5 /* Info.plist */,
@@ -498,7 +487,6 @@
498487
buildActionMask = 2147483647;
499488
files = (
500489
E205C6BC1F1BFFEC007C139E /* Random.swift in Sources */,
501-
E205C6BD1F1BFFEC007C139E /* Entropy.swift in Sources */,
502490
E2D2DD481F436AB6006846F7 /* CharSet.swift in Sources */,
503491
E21779C91F3E7E260019CC42 /* Bytes.swift in Sources */,
504492
);
@@ -509,7 +497,6 @@
509497
buildActionMask = 2147483647;
510498
files = (
511499
E2E140541F422FB1003E02B0 /* RandomTests.swift in Sources */,
512-
E2E140501F422C70003E02B0 /* EntropyTests.swift in Sources */,
513500
E2E140581F433A6D003E02B0 /* CharSetTests.swift in Sources */,
514501
);
515502
runOnlyForDeploymentPostprocessing = 0;
@@ -519,7 +506,6 @@
519506
buildActionMask = 2147483647;
520507
files = (
521508
E205C6F91F1C030E007C139E /* Random.swift in Sources */,
522-
E205C6FA1F1C030E007C139E /* Entropy.swift in Sources */,
523509
E2D2DD491F436AB7006846F7 /* CharSet.swift in Sources */,
524510
E21779CA1F3E7E260019CC42 /* Bytes.swift in Sources */,
525511
);
@@ -530,7 +516,6 @@
530516
buildActionMask = 2147483647;
531517
files = (
532518
E2E140551F422FB1003E02B0 /* RandomTests.swift in Sources */,
533-
E2E140511F422C70003E02B0 /* EntropyTests.swift in Sources */,
534519
E2E140591F433A6D003E02B0 /* CharSetTests.swift in Sources */,
535520
);
536521
runOnlyForDeploymentPostprocessing = 0;
@@ -540,7 +525,6 @@
540525
buildActionMask = 2147483647;
541526
files = (
542527
E21779CB1F3E7E260019CC42 /* Bytes.swift in Sources */,
543-
E2D2DD461F436A5F006846F7 /* Entropy.swift in Sources */,
544528
E2D2DD4A1F436AB8006846F7 /* CharSet.swift in Sources */,
545529
E2D2DD471F436AB3006846F7 /* Random.swift in Sources */,
546530
);
@@ -552,7 +536,6 @@
552536
files = (
553537
E2B8C8171F3E2B8500171502 /* CharSet.swift in Sources */,
554538
E287ADAE1F0E9DDC00DE6DF5 /* Random.swift in Sources */,
555-
E287ADB91F0EDCAB00DE6DF5 /* Entropy.swift in Sources */,
556539
E21779C81F3E7E260019CC42 /* Bytes.swift in Sources */,
557540
);
558541
runOnlyForDeploymentPostprocessing = 0;
@@ -562,7 +545,6 @@
562545
buildActionMask = 2147483647;
563546
files = (
564547
E2E140531F422FB1003E02B0 /* RandomTests.swift in Sources */,
565-
E2E1404F1F422C70003E02B0 /* EntropyTests.swift in Sources */,
566548
E2E140571F433A6D003E02B0 /* CharSetTests.swift in Sources */,
567549
);
568550
runOnlyForDeploymentPostprocessing = 0;

README.md

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -148,23 +148,23 @@ Or perhaps you need a 256 bit token using [RFC 4648](https://tools.ietf.org/html
148148

149149
### <a name="Overview"></a>Overview
150150

151-
`EntropyString` provides easy creation of randomly generated strings of specific entropy using various character sets. Such strings are needed as unique identifiers when generating, for example, random IDs and you don't want the overkill of a GUID.
151+
`EntropyString` provides easy creation of randomly generated strings of specific entropy using various character sets. Such strings are needed as unique identifiers when generating, for example, random IDs and you don't want the overkill of a UUID.
152152

153-
A key concern when generating such strings is that they be unique. Guaranteed uniqueness, however,, requires either deterministic generation (e.g., a counter) that is not random, or that each newly created random string be compared against all existing strings. When ramdoness is required, the overhead of storing and comparing strings is often too onerous and a different tack is chosen.
153+
A key concern when generating such strings is that they be unique. Guaranteed uniqueness, however, requires either deterministic generation (e.g., a counter) that is not random, or that each newly created random string be compared against all existing strings. When randomness is required, the overhead of storing and comparing strings is often too onerous and a different tack is chosen.
154154

155-
A common strategy is to replace the *guarantee of uniqueness* with a weaker but often sufficient *probabilistic uniqueness*. Specifically, rather than being absolutely sure of uniqueness, we settle for a statement such as *"there is less than a 1 in a billion chance that two of my strings are the same"*. This strategy requires much less overhead, but does require we have some manner of qualifying what we mean by *"there is less than a 1 in a billion chance that 1 million strings of this form will have a repeat"*.
155+
A common strategy is to replace the **_guarantee of uniqueness_** with a weaker but often sufficient one of **_probabilistic uniqueness_**. Specifically, rather than being absolutely sure of uniqueness, we settle for a statement such as *"there is less than a 1 in a billion chance that two of my strings are the same"*. We use an implicit version of this very strategy every time we use a hash set, where the keys are formed from taking the hash of some value. We *assume* there will be no hash collision using our values, but we **do not** have any true guarantee of uniqueness per se.
156156

157-
Understanding probabilistic uniqueness of random strings requires an understanding of [*entropy*](https://en.wikipedia.org/wiki/Entropy_(information_theory)) and of estimating the probability of a [*collision*](https://en.wikipedia.org/wiki/Birthday_problem#Cast_as_a_collision_problem) (i.e., the probability that two strings in a set of randomly generated strings might be the same). The blog posting [Hash Collision Probabilities](http://preshing.com/20110504/hash-collision-probabilities/) provides an excellent overview of deriving an expression for calculating the probability of a collision in some number of hashes using a perfect hash with an N-bit output. Thef [Entropy Bits](#EntropyBits) section below discribes how `EntropyString` takes this idea a step further to address a common need in generating unique identifiers.
157+
Fortunately, a probabilistic uniqueness strategy requires much less overhead than guaranteed uniqueness. But it does require we have some manner of qualifying what we mean by *"there is less than a 1 in a billion chance that 1 million strings of this form will have a repeat"*.
158158

159-
We'll begin investigating `EntropyString` by considering our [Real Need](Read%20Need) when generating random strings.
159+
Understanding probabilistic uniqueness of random strings requires an understanding of [*entropy*](https://en.wikipedia.org/wiki/Entropy_(information_theory)) and of estimating the probability of a [*collision*](https://en.wikipedia.org/wiki/Birthday_problem#Cast_as_a_collision_problem) (i.e., the probability that two strings in a set of randomly generated strings might be the same). The blog post [Hash Collision Probabilities](http://preshing.com/20110504/hash-collision-probabilities/) provides an excellent overview of deriving an expression for calculating the probability of a collision in some number of hashes using a perfect hash with an N-bit output. This is sufficient for understanding the probability of collision given a hash with a **fixed** output of N-bits, but does not provide an answer to qualifying what we mean by *"there is less than a 1 in a billion chance that 1 million strings of this form will have a repeat"*. The [Entropy Bits](#EntropyBits) section below describes how `EntropyString` provides this qualifying measure.
160+
161+
We'll begin investigating `EntropyString` by considering the [Real Need](#RealNeed) when generating random strings.
160162

161163
[TOC](#TOC)
162164

163165
### <a name="RealNeed"></a>Real Need
164166

165-
Let's start by reflecting on a common statement of need for developers, who might say:
166-
167-
*I need random strings 16 characters long.*
167+
Let's start by reflecting on the common statement: *I need random strings 16 characters long.*
168168

169169
Okay. There are libraries available that address that exact need. But first, there are some questions that arise from the need as stated, such as:
170170

@@ -180,27 +180,29 @@ As for question 2, the developer might respond:
180180

181181
*I need 10,000 of these things*.
182182

183-
Ah, now we're getting somewhere. The answer to question 3 might lead to the further qualification:
183+
Ah, now we're getting somewhere. The answer to question 3 might lead to a further qualification:
184184

185185
*I need to generate 10,000 random, unique IDs*.
186186

187-
And the cat's out of the bag. We're getting at the real need, and it's not the same as the original statement. The developer needs *uniqueness* across some potential number of strings. The length of the string is a by-product of the uniqueness, not the goal, and should not be the primary specification for the random string.
187+
And the cat's out of the bag. We're getting at the real need, and it's not the same as the original statement. The developer needs *uniqueness* across a total of some number of strings. The length of the string is a by-product of the uniqueness, not the goal, and should not be the primary specification for the random string.
188188

189-
As noted in the [Overview](Overview), guaranteeing uniqueness is difficult, so we'll replace that declaration with one of *probabilistic uniqueness* by asking:
189+
As noted in the [Overview](#Overview), guaranteeing uniqueness is difficult, so we'll replace that declaration with one of *probabilistic uniqueness* by asking a fourth question:
190190

191-
- What risk of a repeat are you willing to accept?
191+
<ol start=4>
192+
<li>What risk of a repeat are you willing to accept?</li>
193+
</ol>
192194

193-
Probabilistic uniqueness contains risk. That's the price we pay for giving up on the stronger declaration of strict uniqueness. But the developer can quantify an appropriate risk for a particular scenario with a statement like:
195+
Probabilistic uniqueness contains risk. That's the price we pay for giving up on the stronger declaration of garuanteed uniqueness. But the developer can quantify an appropriate risk for a particular scenario with a statement like:
194196

195197
*I guess I can live with a 1 in a million chance of a repeat*.
196198

197-
So now we've gotten to the developer's real need:
199+
So now we've finally gotten to the developer's real need:
198200

199201
*I need 10,000 random hexadecimal IDs with less than 1 in a million chance of any repeats*.
200202

201203
Not only is this statement more specific, there is no mention of string length. The developer needs probabilistic uniqueness, and strings are to be used to capture randomness for this purpose. As such, the length of the string is simply a by-product of the encoding used to represent the required uniqueness as a string.
202204

203-
How do you address this need using a library designed to generate strings of specified length? Well, you don't directly, because that library was designed to answer the originally stated need, not the real need we've uncovered. We need a library that deals with probabilistic uniqueness of a total number of some strings. And that's exactly what `EntropyString` does.
205+
How do you address this need using a library designed to generate strings of specified length? Well, you don't, because that library was designed to answer the originally stated need, not the real need we've uncovered. We need a library that deals with probabilistic uniqueness of a total number of some strings. And that's exactly what `EntropyString` does.
204206

205207
Let's use `EntropyString` to help this developer generate 5 hexadecimal IDs from a pool of a potentail 10,000 IDs with a 1 in a milllion chance of a repeat:
206208

Sources/CharSet.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ public struct CharSet {
7979

8080
self.chars = chars
8181
bitsPerChar = UInt8(log2(Float(length)))
82-
charsPerChunk = CharSet.lcm(bitsPerChar, Entropy.bitsPerByte) / bitsPerChar
82+
charsPerChunk = CharSet.lcm(bitsPerChar, Random.bitsPerByte) / bitsPerChar
8383

84-
if CharSet.lcm(bitsPerChar, Entropy.bitsPerByte) == Entropy.bitsPerByte {
84+
if CharSet.lcm(bitsPerChar, Random.bitsPerByte) == Random.bitsPerByte {
8585
ndxFn = CharSet.ndxFnForDivisor(bitsPerChar)
8686
}
8787
else {
@@ -93,7 +93,7 @@ public struct CharSet {
9393
///
9494
public func bytesNeeded(bits: Float) -> Int {
9595
let count = ceil(bits / Float(bitsPerChar))
96-
return Int(ceil(count * Float(bitsPerChar) / Float(Entropy.bitsPerByte)))
96+
return Int(ceil(count * Float(bitsPerChar) / Float(Random.bitsPerByte)))
9797
}
9898

9999
/// Determines index into `CharSet` characters when base is a multiple of 8.
@@ -111,7 +111,7 @@ public struct CharSet {
111111
private static func ndxFnForDivisor(_ bitsPerChar: UInt8) -> NdxFn {
112112
func ndxFn(bytes: [UInt8], chunk: Int, slice: UInt8) -> Ndx {
113113
let lShift = UInt8(bitsPerChar)
114-
let rShift = Entropy.bitsPerByte - bitsPerChar
114+
let rShift = Random.bitsPerByte - bitsPerChar
115115
return (bytes[chunk]<<UInt8(slice*lShift))>>rShift
116116
}
117117
return ndxFn
@@ -131,7 +131,7 @@ public struct CharSet {
131131
/// - return: The a function to index into the `CharSet` characters.
132132
private static func ndxFnForNonDivisor(_ bitsPerChar: UInt8) -> NdxFn {
133133
func ndxFn(bytes: [UInt8], chunk: Int, slice: UInt8) -> Ndx {
134-
let bitsPerByte = Entropy.bitsPerByte
134+
let bitsPerByte = Random.bitsPerByte
135135
let slicesPerChunk = lcm(bitsPerChar, bitsPerByte) / bitsPerByte
136136
let bNum = chunk * Int(slicesPerChunk)
137137

0 commit comments

Comments
 (0)