/
data_source.rb
445 lines (397 loc) · 17.9 KB
/
data_source.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
# encoding: UTF-8
# frozen_string_literal: true
require 'concurrent'
require 'thread'
module TZInfo
# {InvalidDataSource} is raised if the selected {DataSource} doesn't implement
# one of the required methods.
class InvalidDataSource < StandardError
end
# {DataSourceNotFound} is raised if no data source could be found (i.e. if
# `'tzinfo/data'` cannot be found on the load path and no valid zoneinfo
# directory can be found on the system).
class DataSourceNotFound < StandardError
end
# TZInfo can be used with different data sources for time zone and country
# data. Each source of data is implemented as a subclass of {DataSource}.
#
# To choose a data source and override the default selection, use the
# {DataSource.set} method.
#
# @abstract To create a custom data source, create a subclass of {DataSource}
# and implement the {load_timezone_info}, {data_timezone_identifiers},
# {linked_timezone_identifiers}, {load_country_info} and {country_codes}
# methods.
class DataSource
# The currently selected data source.
#
# @private
@@instance = nil
# A `Mutex` used to ensure the default data source is only created once.
#
# @private
@@default_mutex = Mutex.new
class << self
# @return [DataSource] the currently selected source of data.
def get
# If a DataSource hasn't been manually set when the first request is
# made to obtain a DataSource, then a default data source is created.
#
# This is done at the first request rather than when TZInfo is loaded to
# avoid unnecessary attempts to find a suitable DataSource.
#
# A `Mutex` is used to ensure that only a single default instance is
# created (this avoiding the possibility of retaining two copies of the
# same data in memory).
unless @@instance
@@default_mutex.synchronize do
set(create_default_data_source) unless @@instance
end
end
@@instance
end
# Sets the currently selected data source for time zone and country data.
#
# This should usually be set to one of the two standard data source types:
#
# * `:ruby` - read data from the Ruby modules included in the TZInfo::Data
# library (tzinfo-data gem).
# * `:zoneinfo` - read data from the zoneinfo files included with most
# Unix-like operating systems (e.g. in /usr/share/zoneinfo).
#
# To set TZInfo to use one of the standard data source types, call
# `TZInfo::DataSource.set`` in one of the following ways:
#
# TZInfo::DataSource.set(:ruby)
# TZInfo::DataSource.set(:zoneinfo)
# TZInfo::DataSource.set(:zoneinfo, zoneinfo_dir)
# TZInfo::DataSource.set(:zoneinfo, zoneinfo_dir, iso3166_tab_file)
#
# `DataSource.set(:zoneinfo)` will automatically search for the zoneinfo
# directory by checking the paths specified in
# {DataSources::ZoneinfoDataSource.search_path}.
# {DataSources::ZoneinfoDirectoryNotFound} will be raised if no valid
# zoneinfo directory could be found.
#
# `DataSource.set(:zoneinfo, zoneinfo_dir)` uses the specified
# `zoneinfo_dir` directory as the data source. If the directory is not a
# valid zoneinfo directory, a {DataSources::InvalidZoneinfoDirectory}
# exception will be raised.
#
# `DataSource.set(:zoneinfo, zoneinfo_dir, iso3166_tab_file)` uses the
# specified `zoneinfo_dir` directory as the data source, but loads the
# `iso3166.tab` file from the path given by `iso3166_tab_file`. If the
# directory is not a valid zoneinfo directory, a
# {DataSources::InvalidZoneinfoDirectory} exception will be raised.
#
# Custom data sources can be created by subclassing TZInfo::DataSource and
# implementing the following methods:
#
# * {load_timezone_info}
# * {data_timezone_identifiers}
# * {linked_timezone_identifiers}
# * {load_country_info}
# * {country_codes}
#
# To have TZInfo use the custom data source, call {DataSource.set},
# passing an instance of the custom data source implementation as follows:
#
# TZInfo::DataSource.set(CustomDataSource.new)
#
# Calling {DataSource.set} will only affect instances of {Timezone} and
# {Country} obtained with {Timezone.get} and {Country.get} subsequent to
# the {DataSource.set} call. Existing {Timezone} and {Country} instances
# will be unaffected.
#
# If {DataSource.set} is not called, TZInfo will by default attempt to use
# TZInfo::Data as the data source. If TZInfo::Data is not available (i.e.
# if `require 'tzinfo/data'` fails), then TZInfo will search for a
# zoneinfo directory instead (using the search path specified by
# {DataSources::ZoneinfoDataSource.search_path}).
#
# @param data_source_or_type [Object] either `:ruby`, `:zoneinfo` or an
# instance of a {DataSource}.
# @param args [Array<Object>] when `data_source_or_type` is a symbol,
# optional arguments to use when initializing the data source.
# @raise [ArgumentError] if `data_source_or_type` is not `:ruby`,
# `:zoneinfo` or an instance of {DataSource}.
def set(data_source_or_type, *args)
if data_source_or_type.kind_of?(DataSource)
@@instance = data_source_or_type
elsif data_source_or_type == :ruby
@@instance = DataSources::RubyDataSource.new
elsif data_source_or_type == :zoneinfo
@@instance = DataSources::ZoneinfoDataSource.new(*args)
else
raise ArgumentError, 'data_source_or_type must be a DataSource instance or a data source type (:ruby or :zoneinfo)'
end
end
private
# Creates a {DataSource} instance for use as the default. Used if no
# preference has been specified manually.
#
# @return [DataSource] the newly created default {DataSource} instance.
def create_default_data_source
has_tzinfo_data = false
begin
require 'tzinfo/data'
has_tzinfo_data = true
rescue LoadError
end
return DataSources::RubyDataSource.new if has_tzinfo_data
begin
return DataSources::ZoneinfoDataSource.new
rescue DataSources::ZoneinfoDirectoryNotFound
raise DataSourceNotFound, "No source of timezone data could be found.\nPlease refer to https://tzinfo.github.io/datasourcenotfound for help resolving this error."
end
end
end
# Initializes a new {DataSource} instance. Typically only called via
# subclasses of {DataSource}.
def initialize
@timezones = Concurrent::Map.new
end
# Returns a {DataSources::TimezoneInfo} instance for the given identifier.
# The result will derive from either {DataSources::DataTimezoneInfo} for
# time zones that define their own data or {DataSources::LinkedTimezoneInfo}
# for links or aliases to other time zones.
#
# {get_timezone_info} calls {load_timezone_info} to create the
# {DataSources::TimezoneInfo} instance. The returned instance is cached and
# returned in subsequent calls to {get_timezone_info} for the identifier.
#
# @param identifier [String] A time zone identifier.
# @return [DataSources::TimezoneInfo] a {DataSources::TimezoneInfo} instance
# for a given identifier.
# @raise [InvalidTimezoneIdentifier] if the time zone is not found or the
# identifier is invalid.
def get_timezone_info(identifier)
result = @timezones[identifier]
unless result
# Thread-safety: It is possible that multiple equivalent TimezoneInfo
# instances could be created here in concurrently executing threads. The
# consequences of this are that the data may be loaded more than once
# (depending on the data source). The performance benefit of ensuring
# that only a single instance is created is unlikely to be worth the
# overhead of only allowing one TimezoneInfo to be loaded at a time.
result = load_timezone_info(identifier)
@timezones[result.identifier] = result
end
result
end
# @return [Array<String>] a frozen `Array`` of all the available time zone
# identifiers. The identifiers are sorted according to `String#<=>`.
def timezone_identifiers
# Thread-safety: It is possible that the value of @timezone_identifiers
# may be calculated multiple times in concurrently executing threads. It
# is not worth the overhead of locking to ensure that
# @timezone_identifiers is only calculated once.
@timezone_identifiers ||= build_timezone_identifiers
end
# Returns a frozen `Array` of all the available time zone identifiers for
# data time zones (i.e. those that actually contain definitions). The
# identifiers are sorted according to `String#<=>`.
#
# @return [Array<String>] a frozen `Array` of all the available time zone
# identifiers for data time zones.
def data_timezone_identifiers
raise_invalid_data_source('data_timezone_identifiers')
end
# Returns a frozen `Array` of all the available time zone identifiers that
# are links to other time zones. The identifiers are sorted according to
# `String#<=>`.
#
# @return [Array<String>] a frozen `Array` of all the available time zone
# identifiers that are links to other time zones.
def linked_timezone_identifiers
raise_invalid_data_source('linked_timezone_identifiers')
end
# @param code [String] an ISO 3166-1 alpha-2 country code.
# @return [DataSources::CountryInfo] a {DataSources::CountryInfo} instance
# for the given ISO 3166-1 alpha-2 country code.
# @raise [InvalidCountryCode] if the country could not be found or the code
# is invalid.
def get_country_info(code)
load_country_info(code)
end
# Returns a frozen `Array` of all the available ISO 3166-1 alpha-2 country
# codes. The identifiers are sorted according to `String#<=>`.
#
# @return [Array<String>] a frozen `Array` of all the available ISO 3166-1
# alpha-2 country codes.
def country_codes
raise_invalid_data_source('country_codes')
end
# Loads all timezone and country data into memory. This may be desirable in
# production environments to improve copy-on-write performance and to
# avoid flushing the constant cache every time a new timezone or country
# is loaded from {DataSources::RubyDataSource}.
def preload!
timezone_identifiers.each {|identifier| load_timezone_info(identifier) }
country_codes.each {|code| load_country_info(code) }
nil
end
# @return [String] a description of the {DataSource}.
def to_s
"Default DataSource"
end
# @return [String] the internal object state as a programmer-readable
# `String`.
def inspect
"#<#{self.class}>"
end
protected
# Returns a {DataSources::TimezoneInfo} instance for the given time zone
# identifier. The result should derive from either
# {DataSources::DataTimezoneInfo} for time zones that define their own data
# or {DataSources::LinkedTimezoneInfo} for links to or aliases for other
# time zones.
#
# @param identifier [String] A time zone identifier.
# @return [DataSources::TimezoneInfo] a {DataSources::TimezoneInfo} instance
# for the given time zone identifier.
# @raise [InvalidTimezoneIdentifier] if the time zone is not found or the
# identifier is invalid.
def load_timezone_info(identifier)
raise_invalid_data_source('load_timezone_info')
end
# @param code [String] an ISO 3166-1 alpha-2 country code.
# @return [DataSources::CountryInfo] a {DataSources::CountryInfo} instance
# for the given ISO 3166-1 alpha-2 country code.
# @raise [InvalidCountryCode] if the country could not be found or the code
# is invalid.
def load_country_info(code)
raise_invalid_data_source('load_country_info')
end
# @return [Encoding] the `Encoding` used by the `String` instances returned
# by {data_timezone_identifiers} and {linked_timezone_identifiers}.
def timezone_identifier_encoding
Encoding::UTF_8
end
# Checks that the given identifier is a valid time zone identifier (can be
# found in the {timezone_identifiers} `Array`). If the identifier is valid,
# the `String` instance representing that identifier from
# `timezone_identifiers` is returned. Otherwise an
# {InvalidTimezoneIdentifier} exception is raised.
#
# @param identifier [String] a time zone identifier to be validated.
# @return [String] the `String` instance equivalent to `identifier` from
# {timezone_identifiers}.
# @raise [InvalidTimezoneIdentifier] if `identifier` was not found in
# {timezone_identifiers}.
def validate_timezone_identifier(identifier)
raise InvalidTimezoneIdentifier, "Invalid identifier: #{identifier.nil? ? 'nil' : identifier}" unless identifier.kind_of?(String)
valid_identifier = try_with_encoding(identifier, timezone_identifier_encoding) {|id| find_timezone_identifier(id) }
return valid_identifier if valid_identifier
raise InvalidTimezoneIdentifier, "Invalid identifier: #{identifier.encode(Encoding::UTF_8)}"
end
# Looks up a given code in the given hash of code to
# {DataSources::CountryInfo} mappings. If the code is found the
# {DataSources::CountryInfo} is returned. Otherwise an {InvalidCountryCode}
# exception is raised.
#
# @param hash [String, DataSources::CountryInfo] a mapping from ISO 3166-1
# alpha-2 country codes to {DataSources::CountryInfo} instances.
# @param code [String] a country code to lookup.
# @param encoding [Encoding] the encoding used for the country codes in
# `hash`.
# @return [DataSources::CountryInfo] the {DataSources::CountryInfo} instance
# corresponding to `code`.
# @raise [InvalidCountryCode] if `code` was not found in `hash`.
def lookup_country_info(hash, code, encoding = Encoding::UTF_8)
raise InvalidCountryCode, "Invalid country code: #{code.nil? ? 'nil' : code}" unless code.kind_of?(String)
info = try_with_encoding(code, encoding) {|c| hash[c] }
return info if info
raise InvalidCountryCode, "Invalid country code: #{code.encode(Encoding::UTF_8)}"
end
private
# Raises {InvalidDataSource} to indicate that a method has not been
# overridden by a particular data source implementation.
#
# @raise [InvalidDataSource] always.
def raise_invalid_data_source(method_name)
raise InvalidDataSource, "#{method_name} not defined"
end
# Combines {data_timezone_identifiers} and {linked_timezone_identifiers}
# to create an `Array` containing all valid time zone identifiers. If
# {linked_timezone_identifiers} is empty, the {data_timezone_identifiers}
# instance is returned.
#
# The returned `Array` is frozen. The identifiers are sorted according to
# `String#<=>`.
#
# @return [Array<String>] an `Array` containing all valid time zone
# identifiers.
def build_timezone_identifiers
data = data_timezone_identifiers
linked = linked_timezone_identifiers
linked.empty? ? data : (data + linked).sort!.freeze
end
if [].respond_to?(:bsearch)
# If the given `identifier` is contained within the {timezone_identifiers}
# `Array`, the `String` instance representing that identifier from
# {timezone_identifiers} is returned. Otherwise, `nil` is returned.
#
# @param identifier [String] A time zone identifier to search for.
# @return [String] the `String` instance representing `identifier` from
# {timezone_identifiers} if found, or `nil` if not found.
#
# :nocov_no_array_bsearch:
def find_timezone_identifier(identifier)
result = timezone_identifiers.bsearch {|i| i >= identifier }
result == identifier ? result : nil
end
# :nocov_no_array_bsearch:
else
# If the given `identifier` is contained within the {timezone_identifiers}
# `Array`, the `String` instance representing that identifier from
# {timezone_identifiers} is returned. Otherwise, `nil` is returned.
#
# @param identifier [String] A time zone identifier to search for.
# @return [String] the `String` instance representing `identifier` from
# {timezone_identifiers} if found, or `nil` if not found.
#
# :nocov_array_bsearch:
def find_timezone_identifier(identifier)
identifiers = timezone_identifiers
low = 0
high = identifiers.length
while low < high do
mid = (low + high).div(2)
mid_identifier = identifiers[mid]
cmp = mid_identifier <=> identifier
return mid_identifier if cmp == 0
if cmp > 0
high = mid
else
low = mid + 1
end
end
nil
end
# :nocov_array_bsearch:
end
# Tries an operation using `string` directly. If the operation fails, the
# string is copied and encoded with `encoding` and the operation is tried
# again.
#
# @param string [String] The `String` to perform the operation on.
# @param encoding [Encoding] The `Encoding` to use if the initial attempt
# fails.
# @yield [s] the caller will be yielded to once or twice to attempt the
# operation.
# @yieldparam s [String] either `string` or an encoded copy of `string`.
# @yieldreturn [Object] The result of the operation. Must be truthy if
# successful.
# @return [Object] the result of the operation or `nil` if the first attempt
# fails and `string` is already encoded with `encoding`.
def try_with_encoding(string, encoding)
result = yield string
return result if result
unless encoding == string.encoding
string = string.encode(encoding)
yield string
end
end
end
end