astro-ghostcms/.pnpm-store/v3/files/73/dc201b05929edde152118db9682...

681 lines
18 KiB
Plaintext

// The `name` naming table.
// https://www.microsoft.com/typography/OTSPEC/name.htm
import { decode } from '../types';
import parse from '../parse';
// NameIDs for the name table.
const nameTableNames = [
'copyright', // 0
'fontFamily', // 1
'fontSubfamily', // 2
'uniqueID', // 3
'fullName', // 4
'version', // 5
'postScriptName', // 6
'trademark', // 7
'manufacturer', // 8
'designer', // 9
'description', // 10
'manufacturerURL', // 11
'designerURL', // 12
'license', // 13
'licenseURL', // 14
'reserved', // 15
'preferredFamily', // 16
'preferredSubfamily', // 17
'compatibleFullName', // 18
'sampleText', // 19
'postScriptFindFontName', // 20
'wwsFamily', // 21
'wwsSubfamily', // 22
];
const macLanguages = {
0: 'en',
1: 'fr',
2: 'de',
3: 'it',
4: 'nl',
5: 'sv',
6: 'es',
7: 'da',
8: 'pt',
9: 'no',
10: 'he',
11: 'ja',
12: 'ar',
13: 'fi',
14: 'el',
15: 'is',
16: 'mt',
17: 'tr',
18: 'hr',
19: 'zh-Hant',
20: 'ur',
21: 'hi',
22: 'th',
23: 'ko',
24: 'lt',
25: 'pl',
26: 'hu',
27: 'es',
28: 'lv',
29: 'se',
30: 'fo',
31: 'fa',
32: 'ru',
33: 'zh',
34: 'nl-BE',
35: 'ga',
36: 'sq',
37: 'ro',
38: 'cz',
39: 'sk',
40: 'si',
41: 'yi',
42: 'sr',
43: 'mk',
44: 'bg',
45: 'uk',
46: 'be',
47: 'uz',
48: 'kk',
49: 'az-Cyrl',
50: 'az-Arab',
51: 'hy',
52: 'ka',
53: 'mo',
54: 'ky',
55: 'tg',
56: 'tk',
57: 'mn-CN',
58: 'mn',
59: 'ps',
60: 'ks',
61: 'ku',
62: 'sd',
63: 'bo',
64: 'ne',
65: 'sa',
66: 'mr',
67: 'bn',
68: 'as',
69: 'gu',
70: 'pa',
71: 'or',
72: 'ml',
73: 'kn',
74: 'ta',
75: 'te',
76: 'si',
77: 'my',
78: 'km',
79: 'lo',
80: 'vi',
81: 'id',
82: 'tl',
83: 'ms',
84: 'ms-Arab',
85: 'am',
86: 'ti',
87: 'om',
88: 'so',
89: 'sw',
90: 'rw',
91: 'rn',
92: 'ny',
93: 'mg',
94: 'eo',
128: 'cy',
129: 'eu',
130: 'ca',
131: 'la',
132: 'qu',
133: 'gn',
134: 'ay',
135: 'tt',
136: 'ug',
137: 'dz',
138: 'jv',
139: 'su',
140: 'gl',
141: 'af',
142: 'br',
143: 'iu',
144: 'gd',
145: 'gv',
146: 'ga',
147: 'to',
148: 'el-polyton',
149: 'kl',
150: 'az',
151: 'nn',
};
// MacOS language ID → MacOS script ID
//
// Note that the script ID is not sufficient to determine what encoding
// to use in TrueType files. For some languages, MacOS used a modification
// of a mainstream script. For example, an Icelandic name would be stored
// with smRoman in the TrueType naming table, but the actual encoding
// is a special Icelandic version of the normal Macintosh Roman encoding.
// As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
// Syllables but MacOS had run out of available script codes, so this was
// done as a (pretty radical) "modification" of Ethiopic.
//
// http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
const macLanguageToScript = {
0: 0, // langEnglish → smRoman
1: 0, // langFrench → smRoman
2: 0, // langGerman → smRoman
3: 0, // langItalian → smRoman
4: 0, // langDutch → smRoman
5: 0, // langSwedish → smRoman
6: 0, // langSpanish → smRoman
7: 0, // langDanish → smRoman
8: 0, // langPortuguese → smRoman
9: 0, // langNorwegian → smRoman
10: 5, // langHebrew → smHebrew
11: 1, // langJapanese → smJapanese
12: 4, // langArabic → smArabic
13: 0, // langFinnish → smRoman
14: 6, // langGreek → smGreek
15: 0, // langIcelandic → smRoman (modified)
16: 0, // langMaltese → smRoman
17: 0, // langTurkish → smRoman (modified)
18: 0, // langCroatian → smRoman (modified)
19: 2, // langTradChinese → smTradChinese
20: 4, // langUrdu → smArabic
21: 9, // langHindi → smDevanagari
22: 21, // langThai → smThai
23: 3, // langKorean → smKorean
24: 29, // langLithuanian → smCentralEuroRoman
25: 29, // langPolish → smCentralEuroRoman
26: 29, // langHungarian → smCentralEuroRoman
27: 29, // langEstonian → smCentralEuroRoman
28: 29, // langLatvian → smCentralEuroRoman
29: 0, // langSami → smRoman
30: 0, // langFaroese → smRoman (modified)
31: 4, // langFarsi → smArabic (modified)
32: 7, // langRussian → smCyrillic
33: 25, // langSimpChinese → smSimpChinese
34: 0, // langFlemish → smRoman
35: 0, // langIrishGaelic → smRoman (modified)
36: 0, // langAlbanian → smRoman
37: 0, // langRomanian → smRoman (modified)
38: 29, // langCzech → smCentralEuroRoman
39: 29, // langSlovak → smCentralEuroRoman
40: 0, // langSlovenian → smRoman (modified)
41: 5, // langYiddish → smHebrew
42: 7, // langSerbian → smCyrillic
43: 7, // langMacedonian → smCyrillic
44: 7, // langBulgarian → smCyrillic
45: 7, // langUkrainian → smCyrillic (modified)
46: 7, // langByelorussian → smCyrillic
47: 7, // langUzbek → smCyrillic
48: 7, // langKazakh → smCyrillic
49: 7, // langAzerbaijani → smCyrillic
50: 4, // langAzerbaijanAr → smArabic
51: 24, // langArmenian → smArmenian
52: 23, // langGeorgian → smGeorgian
53: 7, // langMoldavian → smCyrillic
54: 7, // langKirghiz → smCyrillic
55: 7, // langTajiki → smCyrillic
56: 7, // langTurkmen → smCyrillic
57: 27, // langMongolian → smMongolian
58: 7, // langMongolianCyr → smCyrillic
59: 4, // langPashto → smArabic
60: 4, // langKurdish → smArabic
61: 4, // langKashmiri → smArabic
62: 4, // langSindhi → smArabic
63: 26, // langTibetan → smTibetan
64: 9, // langNepali → smDevanagari
65: 9, // langSanskrit → smDevanagari
66: 9, // langMarathi → smDevanagari
67: 13, // langBengali → smBengali
68: 13, // langAssamese → smBengali
69: 11, // langGujarati → smGujarati
70: 10, // langPunjabi → smGurmukhi
71: 12, // langOriya → smOriya
72: 17, // langMalayalam → smMalayalam
73: 16, // langKannada → smKannada
74: 14, // langTamil → smTamil
75: 15, // langTelugu → smTelugu
76: 18, // langSinhalese → smSinhalese
77: 19, // langBurmese → smBurmese
78: 20, // langKhmer → smKhmer
79: 22, // langLao → smLao
80: 30, // langVietnamese → smVietnamese
81: 0, // langIndonesian → smRoman
82: 0, // langTagalog → smRoman
83: 0, // langMalayRoman → smRoman
84: 4, // langMalayArabic → smArabic
85: 28, // langAmharic → smEthiopic
86: 28, // langTigrinya → smEthiopic
87: 28, // langOromo → smEthiopic
88: 0, // langSomali → smRoman
89: 0, // langSwahili → smRoman
90: 0, // langKinyarwanda → smRoman
91: 0, // langRundi → smRoman
92: 0, // langNyanja → smRoman
93: 0, // langMalagasy → smRoman
94: 0, // langEsperanto → smRoman
128: 0, // langWelsh → smRoman (modified)
129: 0, // langBasque → smRoman
130: 0, // langCatalan → smRoman
131: 0, // langLatin → smRoman
132: 0, // langQuechua → smRoman
133: 0, // langGuarani → smRoman
134: 0, // langAymara → smRoman
135: 7, // langTatar → smCyrillic
136: 4, // langUighur → smArabic
137: 26, // langDzongkha → smTibetan
138: 0, // langJavaneseRom → smRoman
139: 0, // langSundaneseRom → smRoman
140: 0, // langGalician → smRoman
141: 0, // langAfrikaans → smRoman
142: 0, // langBreton → smRoman (modified)
143: 28, // langInuktitut → smEthiopic (modified)
144: 0, // langScottishGaelic → smRoman (modified)
145: 0, // langManxGaelic → smRoman (modified)
146: 0, // langIrishGaelicScript → smRoman (modified)
147: 0, // langTongan → smRoman
148: 6, // langGreekAncient → smRoman
149: 0, // langGreenlandic → smRoman
150: 0, // langAzerbaijanRoman → smRoman
151: 0, // langNynorsk → smRoman
};
// While Microsoft indicates a region/country for all its language
// IDs, we omit the region code if it's equal to the "most likely
// region subtag" according to Unicode CLDR. For scripts, we omit
// the subtag if it is equal to the Suppress-Script entry in the
// IANA language subtag registry for IETF BCP 47.
//
// For example, Microsoft states that its language code 0x041A is
// Croatian in Croatia. We transform this to the BCP 47 language code 'hr'
// and not 'hr-HR' because Croatia is the default country for Croatian,
// according to Unicode CLDR. As another example, Microsoft states
// that 0x101A is Croatian (Latin) in Bosnia-Herzegovina. We transform
// this to 'hr-BA' and not 'hr-Latn-BA' because Latin is the default script
// for the Croatian language, according to IANA.
//
// http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
// http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
const windowsLanguages = {
0x0436: 'af',
0x041c: 'sq',
0x0484: 'gsw',
0x045e: 'am',
0x1401: 'ar-DZ',
0x3c01: 'ar-BH',
0x0c01: 'ar',
0x0801: 'ar-IQ',
0x2c01: 'ar-JO',
0x3401: 'ar-KW',
0x3001: 'ar-LB',
0x1001: 'ar-LY',
0x1801: 'ary',
0x2001: 'ar-OM',
0x4001: 'ar-QA',
0x0401: 'ar-SA',
0x2801: 'ar-SY',
0x1c01: 'aeb',
0x3801: 'ar-AE',
0x2401: 'ar-YE',
0x042b: 'hy',
0x044d: 'as',
0x082c: 'az-Cyrl',
0x042c: 'az',
0x046d: 'ba',
0x042d: 'eu',
0x0423: 'be',
0x0845: 'bn',
0x0445: 'bn-IN',
0x201a: 'bs-Cyrl',
0x141a: 'bs',
0x047e: 'br',
0x0402: 'bg',
0x0403: 'ca',
0x0c04: 'zh-HK',
0x1404: 'zh-MO',
0x0804: 'zh',
0x1004: 'zh-SG',
0x0404: 'zh-TW',
0x0483: 'co',
0x041a: 'hr',
0x101a: 'hr-BA',
0x0405: 'cs',
0x0406: 'da',
0x048c: 'prs',
0x0465: 'dv',
0x0813: 'nl-BE',
0x0413: 'nl',
0x0c09: 'en-AU',
0x2809: 'en-BZ',
0x1009: 'en-CA',
0x2409: 'en-029',
0x4009: 'en-IN',
0x1809: 'en-IE',
0x2009: 'en-JM',
0x4409: 'en-MY',
0x1409: 'en-NZ',
0x3409: 'en-PH',
0x4809: 'en-SG',
0x1c09: 'en-ZA',
0x2c09: 'en-TT',
0x0809: 'en-GB',
0x0409: 'en',
0x3009: 'en-ZW',
0x0425: 'et',
0x0438: 'fo',
0x0464: 'fil',
0x040b: 'fi',
0x080c: 'fr-BE',
0x0c0c: 'fr-CA',
0x040c: 'fr',
0x140c: 'fr-LU',
0x180c: 'fr-MC',
0x100c: 'fr-CH',
0x0462: 'fy',
0x0456: 'gl',
0x0437: 'ka',
0x0c07: 'de-AT',
0x0407: 'de',
0x1407: 'de-LI',
0x1007: 'de-LU',
0x0807: 'de-CH',
0x0408: 'el',
0x046f: 'kl',
0x0447: 'gu',
0x0468: 'ha',
0x040d: 'he',
0x0439: 'hi',
0x040e: 'hu',
0x040f: 'is',
0x0470: 'ig',
0x0421: 'id',
0x045d: 'iu',
0x085d: 'iu-Latn',
0x083c: 'ga',
0x0434: 'xh',
0x0435: 'zu',
0x0410: 'it',
0x0810: 'it-CH',
0x0411: 'ja',
0x044b: 'kn',
0x043f: 'kk',
0x0453: 'km',
0x0486: 'quc',
0x0487: 'rw',
0x0441: 'sw',
0x0457: 'kok',
0x0412: 'ko',
0x0440: 'ky',
0x0454: 'lo',
0x0426: 'lv',
0x0427: 'lt',
0x082e: 'dsb',
0x046e: 'lb',
0x042f: 'mk',
0x083e: 'ms-BN',
0x043e: 'ms',
0x044c: 'ml',
0x043a: 'mt',
0x0481: 'mi',
0x047a: 'arn',
0x044e: 'mr',
0x047c: 'moh',
0x0450: 'mn',
0x0850: 'mn-CN',
0x0461: 'ne',
0x0414: 'nb',
0x0814: 'nn',
0x0482: 'oc',
0x0448: 'or',
0x0463: 'ps',
0x0415: 'pl',
0x0416: 'pt',
0x0816: 'pt-PT',
0x0446: 'pa',
0x046b: 'qu-BO',
0x086b: 'qu-EC',
0x0c6b: 'qu',
0x0418: 'ro',
0x0417: 'rm',
0x0419: 'ru',
0x243b: 'smn',
0x103b: 'smj-NO',
0x143b: 'smj',
0x0c3b: 'se-FI',
0x043b: 'se',
0x083b: 'se-SE',
0x203b: 'sms',
0x183b: 'sma-NO',
0x1c3b: 'sms',
0x044f: 'sa',
0x1c1a: 'sr-Cyrl-BA',
0x0c1a: 'sr',
0x181a: 'sr-Latn-BA',
0x081a: 'sr-Latn',
0x046c: 'nso',
0x0432: 'tn',
0x045b: 'si',
0x041b: 'sk',
0x0424: 'sl',
0x2c0a: 'es-AR',
0x400a: 'es-BO',
0x340a: 'es-CL',
0x240a: 'es-CO',
0x140a: 'es-CR',
0x1c0a: 'es-DO',
0x300a: 'es-EC',
0x440a: 'es-SV',
0x100a: 'es-GT',
0x480a: 'es-HN',
0x080a: 'es-MX',
0x4c0a: 'es-NI',
0x180a: 'es-PA',
0x3c0a: 'es-PY',
0x280a: 'es-PE',
0x500a: 'es-PR',
// Microsoft has defined two different language codes for
// “Spanish with modern sorting” and “Spanish with traditional
// sorting”. This makes sense for collation APIs, and it would be
// possible to express this in BCP 47 language tags via Unicode
// extensions (eg., es-u-co-trad is Spanish with traditional
// sorting). However, for storing names in fonts, the distinction
// does not make sense, so we give “es” in both cases.
0x0c0a: 'es',
0x040a: 'es',
0x540a: 'es-US',
0x380a: 'es-UY',
0x200a: 'es-VE',
0x081d: 'sv-FI',
0x041d: 'sv',
0x045a: 'syr',
0x0428: 'tg',
0x085f: 'tzm',
0x0449: 'ta',
0x0444: 'tt',
0x044a: 'te',
0x041e: 'th',
0x0451: 'bo',
0x041f: 'tr',
0x0442: 'tk',
0x0480: 'ug',
0x0422: 'uk',
0x042e: 'hsb',
0x0420: 'ur',
0x0843: 'uz-Cyrl',
0x0443: 'uz',
0x042a: 'vi',
0x0452: 'cy',
0x0488: 'wo',
0x0485: 'sah',
0x0478: 'ii',
0x046a: 'yo',
};
// Returns a IETF BCP 47 language code, for example 'zh-Hant'
// for 'Chinese in the traditional script'.
function getLanguageCode(platformID, languageID, ltag) {
switch (platformID) {
case 0: // Unicode
if (languageID === 0xffff) {
return 'und';
} else if (ltag) {
return ltag[languageID];
}
break;
case 1: // Macintosh
return macLanguages[languageID];
case 3: // Windows
return windowsLanguages[languageID];
}
return undefined;
}
const utf16 = 'utf-16';
// MacOS script ID → encoding. This table stores the default case,
// which can be overridden by macLanguageEncodings.
const macScriptEncodings = {
0: 'macintosh', // smRoman
1: 'x-mac-japanese', // smJapanese
2: 'x-mac-chinesetrad', // smTradChinese
3: 'x-mac-korean', // smKorean
6: 'x-mac-greek', // smGreek
7: 'x-mac-cyrillic', // smCyrillic
9: 'x-mac-devanagai', // smDevanagari
10: 'x-mac-gurmukhi', // smGurmukhi
11: 'x-mac-gujarati', // smGujarati
12: 'x-mac-oriya', // smOriya
13: 'x-mac-bengali', // smBengali
14: 'x-mac-tamil', // smTamil
15: 'x-mac-telugu', // smTelugu
16: 'x-mac-kannada', // smKannada
17: 'x-mac-malayalam', // smMalayalam
18: 'x-mac-sinhalese', // smSinhalese
19: 'x-mac-burmese', // smBurmese
20: 'x-mac-khmer', // smKhmer
21: 'x-mac-thai', // smThai
22: 'x-mac-lao', // smLao
23: 'x-mac-georgian', // smGeorgian
24: 'x-mac-armenian', // smArmenian
25: 'x-mac-chinesesimp', // smSimpChinese
26: 'x-mac-tibetan', // smTibetan
27: 'x-mac-mongolian', // smMongolian
28: 'x-mac-ethiopic', // smEthiopic
29: 'x-mac-ce', // smCentralEuroRoman
30: 'x-mac-vietnamese', // smVietnamese
31: 'x-mac-extarabic', // smExtArabic
};
// MacOS language ID → encoding. This table stores the exceptional
// cases, which override macScriptEncodings. For writing MacOS naming
// tables, we need to emit a MacOS script ID. Therefore, we cannot
// merge macScriptEncodings into macLanguageEncodings.
//
// http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
const macLanguageEncodings = {
15: 'x-mac-icelandic', // langIcelandic
17: 'x-mac-turkish', // langTurkish
18: 'x-mac-croatian', // langCroatian
24: 'x-mac-ce', // langLithuanian
25: 'x-mac-ce', // langPolish
26: 'x-mac-ce', // langHungarian
27: 'x-mac-ce', // langEstonian
28: 'x-mac-ce', // langLatvian
30: 'x-mac-icelandic', // langFaroese
37: 'x-mac-romanian', // langRomanian
38: 'x-mac-ce', // langCzech
39: 'x-mac-ce', // langSlovak
40: 'x-mac-ce', // langSlovenian
143: 'x-mac-inuit', // langInuktitut
146: 'x-mac-gaelic', // langIrishGaelicScript
};
function getEncoding(platformID, encodingID, languageID) {
switch (platformID) {
case 0: // Unicode
return utf16;
case 1: // Apple Macintosh
return (
macLanguageEncodings[languageID] ||
macScriptEncodings[encodingID]
);
case 3: // Microsoft Windows
if (encodingID === 1 || encodingID === 10) {
return utf16;
}
break;
}
return undefined;
}
// Parse the naming `name` table.
// FIXME: Format 1 additional fields are not supported yet.
// ltag is the content of the `ltag' table, such as ['en', 'zh-Hans', 'de-CH-1904'].
function parseNameTable(data, start, ltag) {
const name = {};
const p = new parse.Parser(data, start);
const format = p.parseUShort();
const count = p.parseUShort();
const stringOffset = p.offset + p.parseUShort();
for (let i = 0; i < count; i++) {
const platformID = p.parseUShort();
const encodingID = p.parseUShort();
const languageID = p.parseUShort();
const nameID = p.parseUShort();
const property = nameTableNames[nameID] || nameID;
const byteLength = p.parseUShort();
const offset = p.parseUShort();
const language = getLanguageCode(platformID, languageID, ltag);
const encoding = getEncoding(platformID, encodingID, languageID);
if (encoding !== undefined && language !== undefined) {
let text;
if (encoding === utf16) {
text = decode.UTF16(data, stringOffset + offset, byteLength);
} else {
text = decode.MACSTRING(
data,
stringOffset + offset,
byteLength,
encoding
);
}
if (text) {
let translations = name[property];
if (translations === undefined) {
translations = name[property] = {};
}
translations[language] = text;
}
}
}
let langTagCount = 0;
if (format === 1) {
// FIXME: Also handle Microsoft's 'name' table 1.
langTagCount = p.parseUShort();
}
return name;
}
export default { parse: parseNameTable };