1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.networkstack.ipmemorystore;
18 
19 import com.android.internal.annotations.VisibleForTesting;
20 
21 /**
22  * A class containing the logic around the relevance value for
23  * IP Memory Store.
24  *
25  * @hide
26  */
27 public final class RelevanceUtils {
28     /**
29      * The relevance is a decaying value that gets lower and lower until it
30      * reaches 0 after some time passes. It follows an exponential decay law,
31      * dropping slowly at first then faster and faster, because a network is
32      * likely to be visited again if it was visited not long ago, and the longer
33      * it hasn't been visited the more likely it is that it won't be visited
34      * again. For example, a network visited on holiday should stay fresh for
35      * the duration of the holiday and persist for a while, but after the venue
36      * hasn't been visited for a while it should quickly be discarded. What
37      * should accelerate forgetting the network is extended periods without
38      * visits, so that occasional venues get discarded but regular visits keep
39      * the network relevant, even if the visits are infrequent.
40      *
41      * This function must be stable by iteration, meaning that adjusting the same value
42      * for different dates iteratively multiple times should give the same result.
43      * Formally, if f is the decay function that associates a relevance x at a date d1
44      * to the value at ulterior date d3, then for any date d2 between d1 and d3 :
45      * f(x, d3 - d1) = f(f(x, d3 - d2), d2 - d1). Intuitively, this property simply
46      * means it should be the same to compute and store back the value after two months,
47      * or to do it once after one month, store it back, and do it again after another
48      * months has passed.
49      * The pair of the relevance and date define the entire curve, so any pair
50      * of values on the curve will define the same curve. Setting one of them to a
51      * constant, so as not to have to store it, means the other one will always suffice
52      * to describe the curve. For example, only storing the date for a known, constant
53      * value of the relevance is an efficient way of remembering this information (and
54      * to compare relevances together, as f is monotonically decreasing).
55      *
56      *** Choosing the function :
57      * Functions of the kind described above are standard exponential decay functions
58      * like the ones that govern atomic decay where the value at any given date can be
59      * computed uniformly from the value at a previous date and the time elapsed since
60      * that date. It is simple to picture this kind of function as one where after a
61      * given period of time called the half-life, the relevance value will have been
62      * halved. Decay of this kind is expressed in function of the previous value by
63      * functions like
64      * f(x, t) = x * F ^ (t / L)
65      * ...where x is the value, t is the elapsed time, L is the half-life (or more
66      * generally the F-th-life) and F the decay factor (typically 0.5, hence why L is
67      * usually called the half-life). The ^ symbol here is used for exponentiation.
68      * Or, starting at a given M for t = 0 :
69      * f(t) = M * F ^ (t / L)
70      *
71      * Because a line in the store needs to become irrelevant at some point but
72      * this class of functions never go to 0, a minimum cutoff has to be chosen to
73      * represent irrelevance. The simpler way of doing this is to simply add this
74      * minimum cutoff to the computation before and removing it after.
75      * Thus the function becomes :
76      * f(x, t) = ((x + K) * F ^ (t / L)) - K
77      * ...where K is the minimum cutoff, L the half-life, and F the factor between
78      * the original x and x after its half-life. Strictly speaking using the word
79      * "half-life" implies that F = 0.5, but the relation works for any value of F.
80      *
81      * It is easy enough to check that this function satisfies the stability
82      * relation that was given above for any value of F, L and K, which become
83      * parameters that can be defined at will.
84      *
85      * relevance
86      *  1.0 |
87      *      |\
88      *      | \
89      *      |  \            (this graph rendered with L = 75 days and K = 1/40)
90      *  0.75|   ',
91      *      |     \
92      *      |      '.
93      *      |        \.
94      *      |          \
95      *  0.5 |           '\
96      *      |             ''.
97      *      |                ''.
98      *      |                   ''.
99      *  0.25|                      '''..
100      *      |                           '''..
101      *      |                                ''''....
102      *      |                                        '''''..........
103      *    0 +-------------------------------------------------------''''''''''----
104      *      0       50       100      150     200      250     300      350     400 days
105      *
106      *** Choosing the parameters
107      * The maximum M is an arbitrary parameter that simply scales the curve.
108      * The tradeoff for M is pretty simple : if the relevance is going to be an
109      * integer, the bigger M is the more precision there is in the relevance.
110      * However, values of M that are easy for humans to read are preferable to
111      * help debugging, and a suitably low value may be enough to ensure there
112      * won't be integer overflows in intermediate computations.
113      * A value of 1_000_000 probably is plenty for precision, while still in the
114      * low range of what ints can represent.
115      *
116      * F and L are parameters to be chosen arbitrarily and have an impact on how
117      * fast the relevance will be decaying at first, keeping in mind that
118      * the 400 days value and the cap stay the same. In simpler words, F and L
119      * define the steepness of the curve.
120      * To keep things simple (and familiar) F is arbitrarily chosen to be 0.5, and
121      * L is set to 200 days visually to achieve the desired effect. Refer to the
122      * illustration above to get a feel of how that feels.
123      *
124      * Moreover, the memory store works on an assumption that the relevance should
125      * be capped, and that an entry with capped relevance should decay in 400 days.
126      * This is on premises that the networks a device will need to remember the
127      * longest should be networks visited about once a year.
128      * For this reason, the relevance is at the maximum M 400 days before expiry :
129      * f(M, 400 days) = 0
130      * From replacing this with the value of the function, K can then be derived
131      * from the values of M, F and L :
132      * (M + K) * F ^ (t / L) - K = 0
133      * K = M * F ^ (400 days / L) / (1 - F ^ (400 days / L))
134      * Replacing with actual values this gives :
135      * K = 1_000_000 * 0.5 ^ (400 / 200) / (1 - 0.5 ^ (400 / 200))
136      *   = 1_000_000 / 3 ≈ 333_333.3
137      * This ensures the function has the desired profile, the desired value at
138      * cap, and the desired value at expiry.
139      *
140      *** Useful relations
141      * Let's define the expiry time for any given relevance x as the interval of
142      * time such as :
143      * f(x, expiry) = 0
144      * which can be rewritten
145      * ((x + K) * F ^ (expiry / L)) = K
146      * ...giving an expression of the expiry in function of the relevance x as
147      * expiry = L * logF(K / (x + K))
148      * Conversely the relevance x can be expressed in function of the expiry as
149      * x = K / F ^ (expiry / L) - K
150      * These relations are useful in utility functions.
151      *
152      *** Bumping things up
153      * The last issue therefore is to decide how to bump up the relevance. The
154      * simple approach is to simply lift up the curve a little bit by a constant
155      * normalized amount, delaying the time of expiry. For example increasing
156      * the relevance by an amount I gives :
157      * x2 = x1 + I
158      * x2 and x1 correspond to two different expiry times expiry2 and expiry1,
159      * and replacing x1 and x2 in the relation above with their expression in
160      * function of the expiry comes :
161      * K / F ^ (expiry2 / L) - K = K / F ^ (expiry1 / L) - K + I
162      * which resolves to :
163      * expiry2 = L * logF(K / (I + K / F ^ (expiry1 / L)))
164      *
165      * In this implementation, the bump is defined as 1/25th of the cap for
166      * the relevance. This means a network will be remembered for the maximum
167      * period of 400 days if connected 25 times in succession not accounting
168      * for decay. Of course decay actually happens so it will take more than 25
169      * connections for any given network to actually reach the cap, but because
170      * decay is slow at first, it is a good estimate of how fast cap happens.
171      *
172      * Specifically, it gives the following four results :
173      * - A network that a device connects to once hits irrelevance about 32.7 days after
174      *   it was first registered if never connected again.
175      * - A network that a device connects to once a day at a fixed hour will hit the cap
176      *   on the 27th connection.
177      * - A network that a device connects to once a week at a fixed hour will hit the cap
178      *   on the 57th connection.
179      * - A network that a device connects to every day for 7 straight days then never again
180      *   expires 144 days after the last connection.
181      * These metrics tend to match pretty well the requirements.
182      */
183 
184     // TODO : make these constants configurable at runtime. Don't forget to build it so that
185     // changes will wipe the database, migrate the values, or otherwise make sure the relevance
186     // values are still meaningful.
187 
188     // How long, in milliseconds, is a capped relevance valid for, or in other
189     // words how many milliseconds after its relevance was set to RELEVANCE_CAP does
190     // any given line expire. 400 days.
191     @VisibleForTesting
192     public static final long CAPPED_RELEVANCE_LIFETIME_MS = 400L * 24 * 60 * 60 * 1000;
193 
194     // The constant that represents a normalized 1.0 value for the relevance. In other words,
195     // the cap for the relevance. This is referred to as M in the explanation above.
196     @VisibleForTesting
197     public static final int CAPPED_RELEVANCE = 1_000_000;
198 
199     // The decay factor. After a half-life, the relevance will have decayed by this value.
200     // This is referred to as F in the explanation above.
201     private static final double DECAY_FACTOR = 0.5;
202 
203     // The half-life. After this time, the relevance will have decayed by a factor DECAY_FACTOR.
204     // This is referred to as L in the explanation above.
205     private static final long HALF_LIFE_MS = 200L * 24 * 60 * 60 * 1000;
206 
207     // The value of the frame change. This is referred to as K in the explanation above.
208     private static final double IRRELEVANCE_FLOOR =
209             CAPPED_RELEVANCE * powF((double) CAPPED_RELEVANCE_LIFETIME_MS / HALF_LIFE_MS)
210             / (1 - powF((double) CAPPED_RELEVANCE_LIFETIME_MS / HALF_LIFE_MS));
211 
212     // How much to bump the relevance by every time a line is written to.
213     @VisibleForTesting
214     public static final int RELEVANCE_BUMP = CAPPED_RELEVANCE / 25;
215 
216     // Java doesn't include a function for the logarithm in an arbitrary base, so implement it
217     private static final double LOG_DECAY_FACTOR = Math.log(DECAY_FACTOR);
logF(final double value)218     private static double logF(final double value) {
219         return Math.log(value) / LOG_DECAY_FACTOR;
220     }
221 
222     // Utility function to get a power of the decay factor, to simplify the code.
powF(final double value)223     private static double powF(final double value) {
224         return Math.pow(DECAY_FACTOR, value);
225     }
226 
227     /**
228      * Compute the value of the relevance now given an expiry date.
229      *
230      * @param expiry the date at which the column in the database expires.
231      * @return the adjusted value of the relevance for this moment in time.
232      */
computeRelevanceForNow(final long expiry)233     public static int computeRelevanceForNow(final long expiry) {
234         return computeRelevanceForTargetDate(expiry, System.currentTimeMillis());
235     }
236 
237     /**
238      * Compute the value of the relevance at a given date from an expiry date.
239      *
240      * Because relevance decays with time, a relevance in the past corresponds to
241      * a different relevance later.
242      *
243      * Relevance is always a positive value. 0 means not relevant at all.
244      *
245      * See the explanation at the top of this file to get the justification for this
246      * computation.
247      *
248      * @param expiry the date at which the column in the database expires.
249      * @param target the target date to adjust the relevance to.
250      * @return the adjusted value of the relevance for the target moment.
251      */
computeRelevanceForTargetDate(final long expiry, final long target)252     public static int computeRelevanceForTargetDate(final long expiry, final long target) {
253         final long delay = expiry - target;
254         if (delay >= CAPPED_RELEVANCE_LIFETIME_MS) return CAPPED_RELEVANCE;
255         if (delay <= 0) return 0;
256         return (int) (IRRELEVANCE_FLOOR / powF((float) delay / HALF_LIFE_MS) - IRRELEVANCE_FLOOR);
257     }
258 
259     /**
260      * Compute the expiry duration adjusted up for a new fresh write.
261      *
262      * Every time data is written to the memory store for a given line, the
263      * relevance is bumped up by a certain amount, which will boost the priority
264      * of this line for computation of group attributes, and delay (possibly
265      * indefinitely, if the line is accessed regularly) forgetting the data stored
266      * in that line.
267      * As opposed to bumpExpiryDate, this function uses a duration from now to expiry.
268      *
269      * See the explanation at the top of this file for a justification of this computation.
270      *
271      * @param oldExpiryDuration the old expiry duration in milliseconds from now.
272      * @return the expiry duration representing a bumped up relevance value.
273      */
bumpExpiryDuration(final long oldExpiryDuration)274     public static long bumpExpiryDuration(final long oldExpiryDuration) {
275         // L * logF(K / (I + K / F ^ (expiry1 / L))), as documented above
276         final double divisionFactor = powF(((double) oldExpiryDuration) / HALF_LIFE_MS);
277         final double oldRelevance = IRRELEVANCE_FLOOR / divisionFactor;
278         final long newDuration =
279                 (long) (HALF_LIFE_MS * logF(IRRELEVANCE_FLOOR / (RELEVANCE_BUMP + oldRelevance)));
280         return Math.min(newDuration, CAPPED_RELEVANCE_LIFETIME_MS);
281     }
282 
283     /**
284      * Compute the new expiry date adjusted up for a new fresh write.
285      *
286      * Every time data is written to the memory store for a given line, the
287      * relevance is bumped up by a certain amount, which will boost the priority
288      * of this line for computation of group attributes, and delay (possibly
289      * indefinitely, if the line is accessed regularly) forgetting the data stored
290      * in that line.
291      * As opposed to bumpExpiryDuration, this function takes the old timestamp and returns the
292      * new timestamp.
293      *
294      * {@see bumpExpiryDuration}, and keep in mind that the bump depends on when this is called,
295      * because the relevance decays exponentially, therefore bumping up a high relevance (for a
296      * date far in the future) is less potent than bumping up a low relevance (for a date in
297      * a close future).
298      *
299      * @param oldExpiryDate the old date of expiration.
300      * @return the new expiration date after the relevance bump.
301      */
bumpExpiryDate(final long oldExpiryDate)302     public static long bumpExpiryDate(final long oldExpiryDate) {
303         final long now = System.currentTimeMillis();
304         final long newDuration = bumpExpiryDuration(oldExpiryDate - now);
305         return now + newDuration;
306     }
307 
308     // This utility class is not publicly instantiable.
RelevanceUtils()309     private RelevanceUtils() {}
310 }
311