-
Notifications
You must be signed in to change notification settings - Fork 57
/
ietf-lanGen.php
48 lines (40 loc) · 1.82 KB
/
ietf-lanGen.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
<?php
/**
* Extracts all lang codes and basic metadata.
* @version 2015-08-31
* @usage php ietf-lanGen.php > data/ietf-language-tags.csv
*/
//CONFIGS:
$dir= 'main'; // "common/main" obtained from http://www.unicode.org/Public/cldr/latest/core.zip
$cldrCore_js = 'https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/defaultContent.json';
// see https://github.com/unicode-cldr/cldr-core
$cldrCore = json_decode( file_get_contents($cldrCore_js), TRUE );
$cldrCoreDft = array_map( 'strtolower', $cldrCore['defaultContent'] );
$dom = new DOMDocument;
$n=0;
# create ietf-language-tags.csv and assign headers
$myfile = fopen("data/ietf-language-tags.csv", "w") or die("Unable to open file!");
$txt = "lang,langType,territory,revGenDate,defs,dftLang,file";
fwrite($myfile, $txt);
fclose($myfile);
foreach(scandir($dir) as $file) if (preg_match('/^(.+)\.xml$/',$file,$m)) {
$lang = $m[1];
$dom->load("$dir/$file");
$xp = new DOMXpath($dom);
$revGenDate = $xp->evaluate("string(/ldml/identity/generation/@date)");
$langType = $xp->evaluate("string(/ldml/identity/language/@type)");
$territory = $xp->evaluate("string(/ldml/identity/territory/@type)");
$defs = $xp->evaluate("count(/ldml/*[not(self::identity)])");
$revGenDate = preg_replace('/^[^\d]+(\d+\-\d+\-\d+).+$/','$1',$revGenDate);
$isDftLang = in_array(strtolower($lang),$cldrCoreDft)? '1': '0'; // yes or not
$lang = strtr($lang,'_','-');
# open ietf-languange-tags.csv and append data
$myfile = fopen("data/ietf-language-tags.csv", "a") or die("Unable to open file!");
$txt = "\n$lang,$langType,$territory,$revGenDate,$defs,$isDftLang,$file";
fwrite($myfile, $txt);
fclose($myfile);
//print "\n$lang,$langType,$territory,$revGenDate,$defs,$isDftLang,$file";
$n++;
}
// print "\n--- END: $n lang codes ---\n";
?>