// The `name` naming table.
// https://www.microsoft.com/typography/OTSPEC/name.htm

import { decode } from '../types';
import parse from '../parse';

// NameIDs for the name table.
const nameTableNames = [
    'copyright', // 0
    'fontFamily', // 1
    'fontSubfamily', // 2
    'uniqueID', // 3
    'fullName', // 4
    'version', // 5
    'postScriptName', // 6
    'trademark', // 7
    'manufacturer', // 8
    'designer', // 9
    'description', // 10
    'manufacturerURL', // 11
    'designerURL', // 12
    'license', // 13
    'licenseURL', // 14
    'reserved', // 15
    'preferredFamily', // 16
    'preferredSubfamily', // 17
    'compatibleFullName', // 18
    'sampleText', // 19
    'postScriptFindFontName', // 20
    'wwsFamily', // 21
    'wwsSubfamily', // 22
];

const macLanguages = {
    0: 'en',
    1: 'fr',
    2: 'de',
    3: 'it',
    4: 'nl',
    5: 'sv',
    6: 'es',
    7: 'da',
    8: 'pt',
    9: 'no',
    10: 'he',
    11: 'ja',
    12: 'ar',
    13: 'fi',
    14: 'el',
    15: 'is',
    16: 'mt',
    17: 'tr',
    18: 'hr',
    19: 'zh-Hant',
    20: 'ur',
    21: 'hi',
    22: 'th',
    23: 'ko',
    24: 'lt',
    25: 'pl',
    26: 'hu',
    27: 'es',
    28: 'lv',
    29: 'se',
    30: 'fo',
    31: 'fa',
    32: 'ru',
    33: 'zh',
    34: 'nl-BE',
    35: 'ga',
    36: 'sq',
    37: 'ro',
    38: 'cz',
    39: 'sk',
    40: 'si',
    41: 'yi',
    42: 'sr',
    43: 'mk',
    44: 'bg',
    45: 'uk',
    46: 'be',
    47: 'uz',
    48: 'kk',
    49: 'az-Cyrl',
    50: 'az-Arab',
    51: 'hy',
    52: 'ka',
    53: 'mo',
    54: 'ky',
    55: 'tg',
    56: 'tk',
    57: 'mn-CN',
    58: 'mn',
    59: 'ps',
    60: 'ks',
    61: 'ku',
    62: 'sd',
    63: 'bo',
    64: 'ne',
    65: 'sa',
    66: 'mr',
    67: 'bn',
    68: 'as',
    69: 'gu',
    70: 'pa',
    71: 'or',
    72: 'ml',
    73: 'kn',
    74: 'ta',
    75: 'te',
    76: 'si',
    77: 'my',
    78: 'km',
    79: 'lo',
    80: 'vi',
    81: 'id',
    82: 'tl',
    83: 'ms',
    84: 'ms-Arab',
    85: 'am',
    86: 'ti',
    87: 'om',
    88: 'so',
    89: 'sw',
    90: 'rw',
    91: 'rn',
    92: 'ny',
    93: 'mg',
    94: 'eo',
    128: 'cy',
    129: 'eu',
    130: 'ca',
    131: 'la',
    132: 'qu',
    133: 'gn',
    134: 'ay',
    135: 'tt',
    136: 'ug',
    137: 'dz',
    138: 'jv',
    139: 'su',
    140: 'gl',
    141: 'af',
    142: 'br',
    143: 'iu',
    144: 'gd',
    145: 'gv',
    146: 'ga',
    147: 'to',
    148: 'el-polyton',
    149: 'kl',
    150: 'az',
    151: 'nn',
};

// MacOS language ID → MacOS script ID
//
// Note that the script ID is not sufficient to determine what encoding
// to use in TrueType files. For some languages, MacOS used a modification
// of a mainstream script. For example, an Icelandic name would be stored
// with smRoman in the TrueType naming table, but the actual encoding
// is a special Icelandic version of the normal Macintosh Roman encoding.
// As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
// Syllables but MacOS had run out of available script codes, so this was
// done as a (pretty radical) "modification" of Ethiopic.
//
// http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
const macLanguageToScript = {
    0: 0, // langEnglish → smRoman
    1: 0, // langFrench → smRoman
    2: 0, // langGerman → smRoman
    3: 0, // langItalian → smRoman
    4: 0, // langDutch → smRoman
    5: 0, // langSwedish → smRoman
    6: 0, // langSpanish → smRoman
    7: 0, // langDanish → smRoman
    8: 0, // langPortuguese → smRoman
    9: 0, // langNorwegian → smRoman
    10: 5, // langHebrew → smHebrew
    11: 1, // langJapanese → smJapanese
    12: 4, // langArabic → smArabic
    13: 0, // langFinnish → smRoman
    14: 6, // langGreek → smGreek
    15: 0, // langIcelandic → smRoman (modified)
    16: 0, // langMaltese → smRoman
    17: 0, // langTurkish → smRoman (modified)
    18: 0, // langCroatian → smRoman (modified)
    19: 2, // langTradChinese → smTradChinese
    20: 4, // langUrdu → smArabic
    21: 9, // langHindi → smDevanagari
    22: 21, // langThai → smThai
    23: 3, // langKorean → smKorean
    24: 29, // langLithuanian → smCentralEuroRoman
    25: 29, // langPolish → smCentralEuroRoman
    26: 29, // langHungarian → smCentralEuroRoman
    27: 29, // langEstonian → smCentralEuroRoman
    28: 29, // langLatvian → smCentralEuroRoman
    29: 0, // langSami → smRoman
    30: 0, // langFaroese → smRoman (modified)
    31: 4, // langFarsi → smArabic (modified)
    32: 7, // langRussian → smCyrillic
    33: 25, // langSimpChinese → smSimpChinese
    34: 0, // langFlemish → smRoman
    35: 0, // langIrishGaelic → smRoman (modified)
    36: 0, // langAlbanian → smRoman
    37: 0, // langRomanian → smRoman (modified)
    38: 29, // langCzech → smCentralEuroRoman
    39: 29, // langSlovak → smCentralEuroRoman
    40: 0, // langSlovenian → smRoman (modified)
    41: 5, // langYiddish → smHebrew
    42: 7, // langSerbian → smCyrillic
    43: 7, // langMacedonian → smCyrillic
    44: 7, // langBulgarian → smCyrillic
    45: 7, // langUkrainian → smCyrillic (modified)
    46: 7, // langByelorussian → smCyrillic
    47: 7, // langUzbek → smCyrillic
    48: 7, // langKazakh → smCyrillic
    49: 7, // langAzerbaijani → smCyrillic
    50: 4, // langAzerbaijanAr → smArabic
    51: 24, // langArmenian → smArmenian
    52: 23, // langGeorgian → smGeorgian
    53: 7, // langMoldavian → smCyrillic
    54: 7, // langKirghiz → smCyrillic
    55: 7, // langTajiki → smCyrillic
    56: 7, // langTurkmen → smCyrillic
    57: 27, // langMongolian → smMongolian
    58: 7, // langMongolianCyr → smCyrillic
    59: 4, // langPashto → smArabic
    60: 4, // langKurdish → smArabic
    61: 4, // langKashmiri → smArabic
    62: 4, // langSindhi → smArabic
    63: 26, // langTibetan → smTibetan
    64: 9, // langNepali → smDevanagari
    65: 9, // langSanskrit → smDevanagari
    66: 9, // langMarathi → smDevanagari
    67: 13, // langBengali → smBengali
    68: 13, // langAssamese → smBengali
    69: 11, // langGujarati → smGujarati
    70: 10, // langPunjabi → smGurmukhi
    71: 12, // langOriya → smOriya
    72: 17, // langMalayalam → smMalayalam
    73: 16, // langKannada → smKannada
    74: 14, // langTamil → smTamil
    75: 15, // langTelugu → smTelugu
    76: 18, // langSinhalese → smSinhalese
    77: 19, // langBurmese → smBurmese
    78: 20, // langKhmer → smKhmer
    79: 22, // langLao → smLao
    80: 30, // langVietnamese → smVietnamese
    81: 0, // langIndonesian → smRoman
    82: 0, // langTagalog → smRoman
    83: 0, // langMalayRoman → smRoman
    84: 4, // langMalayArabic → smArabic
    85: 28, // langAmharic → smEthiopic
    86: 28, // langTigrinya → smEthiopic
    87: 28, // langOromo → smEthiopic
    88: 0, // langSomali → smRoman
    89: 0, // langSwahili → smRoman
    90: 0, // langKinyarwanda → smRoman
    91: 0, // langRundi → smRoman
    92: 0, // langNyanja → smRoman
    93: 0, // langMalagasy → smRoman
    94: 0, // langEsperanto → smRoman
    128: 0, // langWelsh → smRoman (modified)
    129: 0, // langBasque → smRoman
    130: 0, // langCatalan → smRoman
    131: 0, // langLatin → smRoman
    132: 0, // langQuechua → smRoman
    133: 0, // langGuarani → smRoman
    134: 0, // langAymara → smRoman
    135: 7, // langTatar → smCyrillic
    136: 4, // langUighur → smArabic
    137: 26, // langDzongkha → smTibetan
    138: 0, // langJavaneseRom → smRoman
    139: 0, // langSundaneseRom → smRoman
    140: 0, // langGalician → smRoman
    141: 0, // langAfrikaans → smRoman
    142: 0, // langBreton → smRoman (modified)
    143: 28, // langInuktitut → smEthiopic (modified)
    144: 0, // langScottishGaelic → smRoman (modified)
    145: 0, // langManxGaelic → smRoman (modified)
    146: 0, // langIrishGaelicScript → smRoman (modified)
    147: 0, // langTongan → smRoman
    148: 6, // langGreekAncient → smRoman
    149: 0, // langGreenlandic → smRoman
    150: 0, // langAzerbaijanRoman → smRoman
    151: 0, // langNynorsk → smRoman
};

// While Microsoft indicates a region/country for all its language
// IDs, we omit the region code if it's equal to the "most likely
// region subtag" according to Unicode CLDR. For scripts, we omit
// the subtag if it is equal to the Suppress-Script entry in the
// IANA language subtag registry for IETF BCP 47.
//
// For example, Microsoft states that its language code 0x041A is
// Croatian in Croatia. We transform this to the BCP 47 language code 'hr'
// and not 'hr-HR' because Croatia is the default country for Croatian,
// according to Unicode CLDR. As another example, Microsoft states
// that 0x101A is Croatian (Latin) in Bosnia-Herzegovina. We transform
// this to 'hr-BA' and not 'hr-Latn-BA' because Latin is the default script
// for the Croatian language, according to IANA.
//
// http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
// http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
const windowsLanguages = {
    0x0436: 'af',
    0x041c: 'sq',
    0x0484: 'gsw',
    0x045e: 'am',
    0x1401: 'ar-DZ',
    0x3c01: 'ar-BH',
    0x0c01: 'ar',
    0x0801: 'ar-IQ',
    0x2c01: 'ar-JO',
    0x3401: 'ar-KW',
    0x3001: 'ar-LB',
    0x1001: 'ar-LY',
    0x1801: 'ary',
    0x2001: 'ar-OM',
    0x4001: 'ar-QA',
    0x0401: 'ar-SA',
    0x2801: 'ar-SY',
    0x1c01: 'aeb',
    0x3801: 'ar-AE',
    0x2401: 'ar-YE',
    0x042b: 'hy',
    0x044d: 'as',
    0x082c: 'az-Cyrl',
    0x042c: 'az',
    0x046d: 'ba',
    0x042d: 'eu',
    0x0423: 'be',
    0x0845: 'bn',
    0x0445: 'bn-IN',
    0x201a: 'bs-Cyrl',
    0x141a: 'bs',
    0x047e: 'br',
    0x0402: 'bg',
    0x0403: 'ca',
    0x0c04: 'zh-HK',
    0x1404: 'zh-MO',
    0x0804: 'zh',
    0x1004: 'zh-SG',
    0x0404: 'zh-TW',
    0x0483: 'co',
    0x041a: 'hr',
    0x101a: 'hr-BA',
    0x0405: 'cs',
    0x0406: 'da',
    0x048c: 'prs',
    0x0465: 'dv',
    0x0813: 'nl-BE',
    0x0413: 'nl',
    0x0c09: 'en-AU',
    0x2809: 'en-BZ',
    0x1009: 'en-CA',
    0x2409: 'en-029',
    0x4009: 'en-IN',
    0x1809: 'en-IE',
    0x2009: 'en-JM',
    0x4409: 'en-MY',
    0x1409: 'en-NZ',
    0x3409: 'en-PH',
    0x4809: 'en-SG',
    0x1c09: 'en-ZA',
    0x2c09: 'en-TT',
    0x0809: 'en-GB',
    0x0409: 'en',
    0x3009: 'en-ZW',
    0x0425: 'et',
    0x0438: 'fo',
    0x0464: 'fil',
    0x040b: 'fi',
    0x080c: 'fr-BE',
    0x0c0c: 'fr-CA',
    0x040c: 'fr',
    0x140c: 'fr-LU',
    0x180c: 'fr-MC',
    0x100c: 'fr-CH',
    0x0462: 'fy',
    0x0456: 'gl',
    0x0437: 'ka',
    0x0c07: 'de-AT',
    0x0407: 'de',
    0x1407: 'de-LI',
    0x1007: 'de-LU',
    0x0807: 'de-CH',
    0x0408: 'el',
    0x046f: 'kl',
    0x0447: 'gu',
    0x0468: 'ha',
    0x040d: 'he',
    0x0439: 'hi',
    0x040e: 'hu',
    0x040f: 'is',
    0x0470: 'ig',
    0x0421: 'id',
    0x045d: 'iu',
    0x085d: 'iu-Latn',
    0x083c: 'ga',
    0x0434: 'xh',
    0x0435: 'zu',
    0x0410: 'it',
    0x0810: 'it-CH',
    0x0411: 'ja',
    0x044b: 'kn',
    0x043f: 'kk',
    0x0453: 'km',
    0x0486: 'quc',
    0x0487: 'rw',
    0x0441: 'sw',
    0x0457: 'kok',
    0x0412: 'ko',
    0x0440: 'ky',
    0x0454: 'lo',
    0x0426: 'lv',
    0x0427: 'lt',
    0x082e: 'dsb',
    0x046e: 'lb',
    0x042f: 'mk',
    0x083e: 'ms-BN',
    0x043e: 'ms',
    0x044c: 'ml',
    0x043a: 'mt',
    0x0481: 'mi',
    0x047a: 'arn',
    0x044e: 'mr',
    0x047c: 'moh',
    0x0450: 'mn',
    0x0850: 'mn-CN',
    0x0461: 'ne',
    0x0414: 'nb',
    0x0814: 'nn',
    0x0482: 'oc',
    0x0448: 'or',
    0x0463: 'ps',
    0x0415: 'pl',
    0x0416: 'pt',
    0x0816: 'pt-PT',
    0x0446: 'pa',
    0x046b: 'qu-BO',
    0x086b: 'qu-EC',
    0x0c6b: 'qu',
    0x0418: 'ro',
    0x0417: 'rm',
    0x0419: 'ru',
    0x243b: 'smn',
    0x103b: 'smj-NO',
    0x143b: 'smj',
    0x0c3b: 'se-FI',
    0x043b: 'se',
    0x083b: 'se-SE',
    0x203b: 'sms',
    0x183b: 'sma-NO',
    0x1c3b: 'sms',
    0x044f: 'sa',
    0x1c1a: 'sr-Cyrl-BA',
    0x0c1a: 'sr',
    0x181a: 'sr-Latn-BA',
    0x081a: 'sr-Latn',
    0x046c: 'nso',
    0x0432: 'tn',
    0x045b: 'si',
    0x041b: 'sk',
    0x0424: 'sl',
    0x2c0a: 'es-AR',
    0x400a: 'es-BO',
    0x340a: 'es-CL',
    0x240a: 'es-CO',
    0x140a: 'es-CR',
    0x1c0a: 'es-DO',
    0x300a: 'es-EC',
    0x440a: 'es-SV',
    0x100a: 'es-GT',
    0x480a: 'es-HN',
    0x080a: 'es-MX',
    0x4c0a: 'es-NI',
    0x180a: 'es-PA',
    0x3c0a: 'es-PY',
    0x280a: 'es-PE',
    0x500a: 'es-PR',

    // Microsoft has defined two different language codes for
    // “Spanish with modern sorting” and “Spanish with traditional
    // sorting”. This makes sense for collation APIs, and it would be
    // possible to express this in BCP 47 language tags via Unicode
    // extensions (eg., es-u-co-trad is Spanish with traditional
    // sorting). However, for storing names in fonts, the distinction
    // does not make sense, so we give “es” in both cases.
    0x0c0a: 'es',
    0x040a: 'es',

    0x540a: 'es-US',
    0x380a: 'es-UY',
    0x200a: 'es-VE',
    0x081d: 'sv-FI',
    0x041d: 'sv',
    0x045a: 'syr',
    0x0428: 'tg',
    0x085f: 'tzm',
    0x0449: 'ta',
    0x0444: 'tt',
    0x044a: 'te',
    0x041e: 'th',
    0x0451: 'bo',
    0x041f: 'tr',
    0x0442: 'tk',
    0x0480: 'ug',
    0x0422: 'uk',
    0x042e: 'hsb',
    0x0420: 'ur',
    0x0843: 'uz-Cyrl',
    0x0443: 'uz',
    0x042a: 'vi',
    0x0452: 'cy',
    0x0488: 'wo',
    0x0485: 'sah',
    0x0478: 'ii',
    0x046a: 'yo',
};

// Returns a IETF BCP 47 language code, for example 'zh-Hant'
// for 'Chinese in the traditional script'.
function getLanguageCode(platformID, languageID, ltag) {
    switch (platformID) {
        case 0: // Unicode
            if (languageID === 0xffff) {
                return 'und';
            } else if (ltag) {
                return ltag[languageID];
            }

            break;

        case 1: // Macintosh
            return macLanguages[languageID];

        case 3: // Windows
            return windowsLanguages[languageID];
    }

    return undefined;
}

const utf16 = 'utf-16';

// MacOS script ID → encoding. This table stores the default case,
// which can be overridden by macLanguageEncodings.
const macScriptEncodings = {
    0: 'macintosh', // smRoman
    1: 'x-mac-japanese', // smJapanese
    2: 'x-mac-chinesetrad', // smTradChinese
    3: 'x-mac-korean', // smKorean
    6: 'x-mac-greek', // smGreek
    7: 'x-mac-cyrillic', // smCyrillic
    9: 'x-mac-devanagai', // smDevanagari
    10: 'x-mac-gurmukhi', // smGurmukhi
    11: 'x-mac-gujarati', // smGujarati
    12: 'x-mac-oriya', // smOriya
    13: 'x-mac-bengali', // smBengali
    14: 'x-mac-tamil', // smTamil
    15: 'x-mac-telugu', // smTelugu
    16: 'x-mac-kannada', // smKannada
    17: 'x-mac-malayalam', // smMalayalam
    18: 'x-mac-sinhalese', // smSinhalese
    19: 'x-mac-burmese', // smBurmese
    20: 'x-mac-khmer', // smKhmer
    21: 'x-mac-thai', // smThai
    22: 'x-mac-lao', // smLao
    23: 'x-mac-georgian', // smGeorgian
    24: 'x-mac-armenian', // smArmenian
    25: 'x-mac-chinesesimp', // smSimpChinese
    26: 'x-mac-tibetan', // smTibetan
    27: 'x-mac-mongolian', // smMongolian
    28: 'x-mac-ethiopic', // smEthiopic
    29: 'x-mac-ce', // smCentralEuroRoman
    30: 'x-mac-vietnamese', // smVietnamese
    31: 'x-mac-extarabic', // smExtArabic
};

// MacOS language ID → encoding. This table stores the exceptional
// cases, which override macScriptEncodings. For writing MacOS naming
// tables, we need to emit a MacOS script ID. Therefore, we cannot
// merge macScriptEncodings into macLanguageEncodings.
//
// http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
const macLanguageEncodings = {
    15: 'x-mac-icelandic', // langIcelandic
    17: 'x-mac-turkish', // langTurkish
    18: 'x-mac-croatian', // langCroatian
    24: 'x-mac-ce', // langLithuanian
    25: 'x-mac-ce', // langPolish
    26: 'x-mac-ce', // langHungarian
    27: 'x-mac-ce', // langEstonian
    28: 'x-mac-ce', // langLatvian
    30: 'x-mac-icelandic', // langFaroese
    37: 'x-mac-romanian', // langRomanian
    38: 'x-mac-ce', // langCzech
    39: 'x-mac-ce', // langSlovak
    40: 'x-mac-ce', // langSlovenian
    143: 'x-mac-inuit', // langInuktitut
    146: 'x-mac-gaelic', // langIrishGaelicScript
};

function getEncoding(platformID, encodingID, languageID) {
    switch (platformID) {
        case 0: // Unicode
            return utf16;

        case 1: // Apple Macintosh
            return (
                macLanguageEncodings[languageID] ||
                macScriptEncodings[encodingID]
            );

        case 3: // Microsoft Windows
            if (encodingID === 1 || encodingID === 10) {
                return utf16;
            }

            break;
    }

    return undefined;
}

// Parse the naming `name` table.
// FIXME: Format 1 additional fields are not supported yet.
// ltag is the content of the `ltag' table, such as ['en', 'zh-Hans', 'de-CH-1904'].
function parseNameTable(data, start, ltag) {
    const name = {};
    const p = new parse.Parser(data, start);
    const format = p.parseUShort();
    const count = p.parseUShort();
    const stringOffset = p.offset + p.parseUShort();
    for (let i = 0; i < count; i++) {
        const platformID = p.parseUShort();
        const encodingID = p.parseUShort();
        const languageID = p.parseUShort();
        const nameID = p.parseUShort();
        const property = nameTableNames[nameID] || nameID;
        const byteLength = p.parseUShort();
        const offset = p.parseUShort();
        const language = getLanguageCode(platformID, languageID, ltag);
        const encoding = getEncoding(platformID, encodingID, languageID);
        if (encoding !== undefined && language !== undefined) {
            let text;
            if (encoding === utf16) {
                text = decode.UTF16(data, stringOffset + offset, byteLength);
            } else {
                text = decode.MACSTRING(
                    data,
                    stringOffset + offset,
                    byteLength,
                    encoding
                );
            }

            if (text) {
                let translations = name[property];
                if (translations === undefined) {
                    translations = name[property] = {};
                }

                translations[language] = text;
            }
        }
    }

    let langTagCount = 0;
    if (format === 1) {
        // FIXME: Also handle Microsoft's 'name' table 1.
        langTagCount = p.parseUShort();
    }

    return name;
}

export default { parse: parseNameTable };