523 lines
16 KiB
Plaintext
523 lines
16 KiB
Plaintext
/**
|
|
* Converts a string into a list of tokens.
|
|
*/
|
|
|
|
/**
|
|
* Create a new token
|
|
* @param {string} char a single char
|
|
*/
|
|
function Token(char) {
|
|
this.char = char;
|
|
this.state = {};
|
|
this.activeState = null;
|
|
}
|
|
|
|
/**
|
|
* Create a new context range
|
|
* @param {number} startIndex range start index
|
|
* @param {number} endOffset range end index offset
|
|
* @param {string} contextName owner context name
|
|
*/
|
|
function ContextRange(startIndex, endOffset, contextName) {
|
|
this.contextName = contextName;
|
|
this.startIndex = startIndex;
|
|
this.endOffset = endOffset;
|
|
}
|
|
|
|
/**
|
|
* Check context start and end
|
|
* @param {string} contextName a unique context name
|
|
* @param {function} checkStart a predicate function the indicates a context's start
|
|
* @param {function} checkEnd a predicate function the indicates a context's end
|
|
*/
|
|
function ContextChecker(contextName, checkStart, checkEnd) {
|
|
this.contextName = contextName;
|
|
this.openRange = null;
|
|
this.ranges = [];
|
|
this.checkStart = checkStart;
|
|
this.checkEnd = checkEnd;
|
|
}
|
|
|
|
/**
|
|
* @typedef ContextParams
|
|
* @type Object
|
|
* @property {array} context context items
|
|
* @property {number} currentIndex current item index
|
|
*/
|
|
|
|
/**
|
|
* Create a context params
|
|
* @param {array} context a list of items
|
|
* @param {number} currentIndex current item index
|
|
*/
|
|
function ContextParams(context, currentIndex) {
|
|
this.context = context;
|
|
this.index = currentIndex;
|
|
this.length = context.length;
|
|
this.current = context[currentIndex];
|
|
this.backtrack = context.slice(0, currentIndex);
|
|
this.lookahead = context.slice(currentIndex + 1);
|
|
}
|
|
|
|
/**
|
|
* Create an event instance
|
|
* @param {string} eventId event unique id
|
|
*/
|
|
function Event(eventId) {
|
|
this.eventId = eventId;
|
|
this.subscribers = [];
|
|
}
|
|
|
|
/**
|
|
* Initialize a core events and auto subscribe required event handlers
|
|
* @param {any} events an object that enlists core events handlers
|
|
*/
|
|
function initializeCoreEvents(events) {
|
|
const coreEvents = [
|
|
'start', 'end', 'next', 'newToken', 'contextStart',
|
|
'contextEnd', 'insertToken', 'removeToken', 'removeRange',
|
|
'replaceToken', 'replaceRange', 'composeRUD', 'updateContextsRanges'
|
|
];
|
|
|
|
coreEvents.forEach(eventId => {
|
|
Object.defineProperty(this.events, eventId, {
|
|
value: new Event(eventId)
|
|
});
|
|
});
|
|
|
|
if (!!events) {
|
|
coreEvents.forEach(eventId => {
|
|
const event = events[eventId];
|
|
if (typeof event === 'function') {
|
|
this.events[eventId].subscribe(event);
|
|
}
|
|
});
|
|
}
|
|
const requiresContextUpdate = [
|
|
'insertToken', 'removeToken', 'removeRange',
|
|
'replaceToken', 'replaceRange', 'composeRUD'
|
|
];
|
|
requiresContextUpdate.forEach(eventId => {
|
|
this.events[eventId].subscribe(
|
|
this.updateContextsRanges
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Converts a string into a list of tokens
|
|
* @param {any} events tokenizer core events
|
|
*/
|
|
function Tokenizer(events) {
|
|
this.tokens = [];
|
|
this.registeredContexts = {};
|
|
this.contextCheckers = [];
|
|
this.events = {};
|
|
this.registeredModifiers = [];
|
|
|
|
initializeCoreEvents.call(this, events);
|
|
}
|
|
|
|
/**
|
|
* Sets the state of a token, usually called by a state modifier.
|
|
* @param {string} key state item key
|
|
* @param {any} value state item value
|
|
*/
|
|
Token.prototype.setState = function(key, value) {
|
|
this.state[key] = value;
|
|
this.activeState = { key, value: this.state[key] };
|
|
return this.activeState;
|
|
};
|
|
|
|
Token.prototype.getState = function (stateId) {
|
|
return this.state[stateId] || null;
|
|
};
|
|
|
|
/**
|
|
* Checks if an index exists in the tokens list.
|
|
* @param {number} index token index
|
|
*/
|
|
Tokenizer.prototype.inboundIndex = function(index) {
|
|
return index >= 0 && index < this.tokens.length;
|
|
};
|
|
|
|
/**
|
|
* Compose and apply a list of operations (replace, update, delete)
|
|
* @param {array} RUDs replace, update and delete operations
|
|
* TODO: Perf. Optimization (lengthBefore === lengthAfter ? dispatch once)
|
|
*/
|
|
Tokenizer.prototype.composeRUD = function (RUDs) {
|
|
const silent = true;
|
|
const state = RUDs.map(RUD => (
|
|
this[RUD[0]].apply(this, RUD.slice(1).concat(silent))
|
|
));
|
|
const hasFAILObject = obj => (
|
|
typeof obj === 'object' &&
|
|
obj.hasOwnProperty('FAIL')
|
|
);
|
|
if (state.every(hasFAILObject)) {
|
|
return {
|
|
FAIL: `composeRUD: one or more operations hasn't completed successfully`,
|
|
report: state.filter(hasFAILObject)
|
|
};
|
|
}
|
|
this.dispatch('composeRUD', [state.filter(op => !hasFAILObject(op))]);
|
|
};
|
|
|
|
/**
|
|
* Replace a range of tokens with a list of tokens
|
|
* @param {number} startIndex range start index
|
|
* @param {number} offset range offset
|
|
* @param {token} tokens a list of tokens to replace
|
|
* @param {boolean} silent dispatch events and update context ranges
|
|
*/
|
|
Tokenizer.prototype.replaceRange = function (startIndex, offset, tokens, silent) {
|
|
offset = offset !== null ? offset : this.tokens.length;
|
|
const isTokenType = tokens.every(token => token instanceof Token);
|
|
if (!isNaN(startIndex) && this.inboundIndex(startIndex) && isTokenType) {
|
|
const replaced = this.tokens.splice.apply(
|
|
this.tokens, [startIndex, offset].concat(tokens)
|
|
);
|
|
if (!silent) this.dispatch('replaceToken', [startIndex, offset, tokens]);
|
|
return [replaced, tokens];
|
|
} else {
|
|
return { FAIL: 'replaceRange: invalid tokens or startIndex.' };
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Replace a token with another token
|
|
* @param {number} index token index
|
|
* @param {token} token a token to replace
|
|
* @param {boolean} silent dispatch events and update context ranges
|
|
*/
|
|
Tokenizer.prototype.replaceToken = function (index, token, silent) {
|
|
if (!isNaN(index) && this.inboundIndex(index) && token instanceof Token) {
|
|
const replaced = this.tokens.splice(index, 1, token);
|
|
if (!silent) this.dispatch('replaceToken', [index, token]);
|
|
return [replaced[0], token];
|
|
} else {
|
|
return { FAIL: 'replaceToken: invalid token or index.' };
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Removes a range of tokens
|
|
* @param {number} startIndex range start index
|
|
* @param {number} offset range offset
|
|
* @param {boolean} silent dispatch events and update context ranges
|
|
*/
|
|
Tokenizer.prototype.removeRange = function(startIndex, offset, silent) {
|
|
offset = !isNaN(offset) ? offset : this.tokens.length;
|
|
const tokens = this.tokens.splice(startIndex, offset);
|
|
if (!silent) this.dispatch('removeRange', [tokens, startIndex, offset]);
|
|
return tokens;
|
|
};
|
|
|
|
/**
|
|
* Remove a token at a certain index
|
|
* @param {number} index token index
|
|
* @param {boolean} silent dispatch events and update context ranges
|
|
*/
|
|
Tokenizer.prototype.removeToken = function(index, silent) {
|
|
if (!isNaN(index) && this.inboundIndex(index)) {
|
|
const token = this.tokens.splice(index, 1);
|
|
if (!silent) this.dispatch('removeToken', [token, index]);
|
|
return token;
|
|
} else {
|
|
return { FAIL: 'removeToken: invalid token index.' };
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Insert a list of tokens at a certain index
|
|
* @param {array} tokens a list of tokens to insert
|
|
* @param {number} index insert the list of tokens at index
|
|
* @param {boolean} silent dispatch events and update context ranges
|
|
*/
|
|
Tokenizer.prototype.insertToken = function (tokens, index, silent) {
|
|
const tokenType = tokens.every(
|
|
token => token instanceof Token
|
|
);
|
|
if (tokenType) {
|
|
this.tokens.splice.apply(
|
|
this.tokens, [index, 0].concat(tokens)
|
|
);
|
|
if (!silent) this.dispatch('insertToken', [tokens, index]);
|
|
return tokens;
|
|
} else {
|
|
return { FAIL: 'insertToken: invalid token(s).' };
|
|
}
|
|
};
|
|
|
|
/**
|
|
* A state modifier that is called on 'newToken' event
|
|
* @param {string} modifierId state modifier id
|
|
* @param {function} condition a predicate function that returns true or false
|
|
* @param {function} modifier a function to update token state
|
|
*/
|
|
Tokenizer.prototype.registerModifier = function(modifierId, condition, modifier) {
|
|
this.events.newToken.subscribe(function(token, contextParams) {
|
|
const conditionParams = [token, contextParams];
|
|
const canApplyModifier = (
|
|
condition === null ||
|
|
condition.apply(this, conditionParams) === true
|
|
);
|
|
const modifierParams = [token, contextParams];
|
|
if (canApplyModifier) {
|
|
let newStateValue = modifier.apply(this, modifierParams);
|
|
token.setState(modifierId, newStateValue);
|
|
}
|
|
});
|
|
this.registeredModifiers.push(modifierId);
|
|
};
|
|
|
|
/**
|
|
* Subscribe a handler to an event
|
|
* @param {function} eventHandler an event handler function
|
|
*/
|
|
Event.prototype.subscribe = function (eventHandler) {
|
|
if (typeof eventHandler === 'function') {
|
|
return ((this.subscribers.push(eventHandler)) - 1);
|
|
} else {
|
|
return { FAIL: `invalid '${this.eventId}' event handler`};
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Unsubscribe an event handler
|
|
* @param {string} subsId subscription id
|
|
*/
|
|
Event.prototype.unsubscribe = function (subsId) {
|
|
this.subscribers.splice(subsId, 1);
|
|
};
|
|
|
|
/**
|
|
* Sets context params current value index
|
|
* @param {number} index context params current value index
|
|
*/
|
|
ContextParams.prototype.setCurrentIndex = function(index) {
|
|
this.index = index;
|
|
this.current = this.context[index];
|
|
this.backtrack = this.context.slice(0, index);
|
|
this.lookahead = this.context.slice(index + 1);
|
|
};
|
|
|
|
/**
|
|
* Get an item at an offset from the current value
|
|
* example (current value is 3):
|
|
* 1 2 [3] 4 5 | items values
|
|
* -2 -1 0 1 2 | offset values
|
|
* @param {number} offset an offset from current value index
|
|
*/
|
|
ContextParams.prototype.get = function (offset) {
|
|
switch (true) {
|
|
case (offset === 0):
|
|
return this.current;
|
|
case (offset < 0 && Math.abs(offset) <= this.backtrack.length):
|
|
return this.backtrack.slice(offset)[0];
|
|
case (offset > 0 && offset <= this.lookahead.length):
|
|
return this.lookahead[offset - 1];
|
|
default:
|
|
return null;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Converts a context range into a string value
|
|
* @param {contextRange} range a context range
|
|
*/
|
|
Tokenizer.prototype.rangeToText = function (range) {
|
|
if (range instanceof ContextRange) {
|
|
return (
|
|
this.getRangeTokens(range)
|
|
.map(token => token.char).join('')
|
|
);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Converts all tokens into a string
|
|
*/
|
|
Tokenizer.prototype.getText = function () {
|
|
return this.tokens.map(token => token.char).join('');
|
|
};
|
|
|
|
/**
|
|
* Get a context by name
|
|
* @param {string} contextName context name to get
|
|
*/
|
|
Tokenizer.prototype.getContext = function (contextName) {
|
|
let context = this.registeredContexts[contextName];
|
|
return !!context ? context : null;
|
|
};
|
|
|
|
/**
|
|
* Subscribes a new event handler to an event
|
|
* @param {string} eventName event name to subscribe to
|
|
* @param {function} eventHandler a function to be invoked on event
|
|
*/
|
|
Tokenizer.prototype.on = function(eventName, eventHandler) {
|
|
const event = this.events[eventName];
|
|
if (!!event) {
|
|
return event.subscribe(eventHandler);
|
|
} else {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Dispatches an event
|
|
* @param {string} eventName event name
|
|
* @param {any} args event handler arguments
|
|
*/
|
|
Tokenizer.prototype.dispatch = function(eventName, args) {
|
|
const event = this.events[eventName];
|
|
if (event instanceof Event) {
|
|
event.subscribers.forEach(subscriber => {
|
|
subscriber.apply(this, args || []);
|
|
});
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Register a new context checker
|
|
* @param {string} contextName a unique context name
|
|
* @param {function} contextStartCheck a predicate function that returns true on context start
|
|
* @param {function} contextEndCheck a predicate function that returns true on context end
|
|
* TODO: call tokenize on registration to update context ranges with the new context.
|
|
*/
|
|
Tokenizer.prototype.registerContextChecker = function(contextName, contextStartCheck, contextEndCheck) {
|
|
if (!!this.getContext(contextName)) return {
|
|
FAIL:
|
|
`context name '${contextName}' is already registered.`
|
|
};
|
|
if (typeof contextStartCheck !== 'function') return {
|
|
FAIL:
|
|
`missing context start check.`
|
|
};
|
|
if (typeof contextEndCheck !== 'function') return {
|
|
FAIL:
|
|
`missing context end check.`
|
|
};
|
|
const contextCheckers = new ContextChecker(
|
|
contextName, contextStartCheck, contextEndCheck
|
|
);
|
|
this.registeredContexts[contextName] = contextCheckers;
|
|
this.contextCheckers.push(contextCheckers);
|
|
return contextCheckers;
|
|
};
|
|
|
|
/**
|
|
* Gets a context range tokens
|
|
* @param {contextRange} range a context range
|
|
*/
|
|
Tokenizer.prototype.getRangeTokens = function(range) {
|
|
const endIndex = range.startIndex + range.endOffset;
|
|
return [].concat(
|
|
this.tokens
|
|
.slice(range.startIndex, endIndex)
|
|
);
|
|
};
|
|
|
|
/**
|
|
* Gets the ranges of a context
|
|
* @param {string} contextName context name
|
|
*/
|
|
Tokenizer.prototype.getContextRanges = function(contextName) {
|
|
const context = this.getContext(contextName);
|
|
if (!!context) {
|
|
return context.ranges;
|
|
} else {
|
|
return { FAIL: `context checker '${contextName}' is not registered.` };
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Resets context ranges to run context update
|
|
*/
|
|
Tokenizer.prototype.resetContextsRanges = function () {
|
|
const registeredContexts = this.registeredContexts;
|
|
for (const contextName in registeredContexts) {
|
|
if (registeredContexts.hasOwnProperty(contextName)) {
|
|
const context = registeredContexts[contextName];
|
|
context.ranges = [];
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Updates context ranges
|
|
*/
|
|
Tokenizer.prototype.updateContextsRanges = function () {
|
|
this.resetContextsRanges();
|
|
const chars = this.tokens.map(token => token.char);
|
|
for (let i = 0; i < chars.length; i++) {
|
|
const contextParams = new ContextParams(chars, i);
|
|
this.runContextCheck(contextParams);
|
|
}
|
|
this.dispatch('updateContextsRanges', [this.registeredContexts]);
|
|
};
|
|
|
|
/**
|
|
* Sets the end offset of an open range
|
|
* @param {number} offset range end offset
|
|
* @param {string} contextName context name
|
|
*/
|
|
Tokenizer.prototype.setEndOffset = function (offset, contextName) {
|
|
const startIndex = this.getContext(contextName).openRange.startIndex;
|
|
let range = new ContextRange(startIndex, offset, contextName);
|
|
const ranges = this.getContext(contextName).ranges;
|
|
range.rangeId = `${contextName}.${ranges.length}`;
|
|
ranges.push(range);
|
|
this.getContext(contextName).openRange = null;
|
|
return range;
|
|
};
|
|
|
|
/**
|
|
* Runs a context check on the current context
|
|
* @param {contextParams} contextParams current context params
|
|
*/
|
|
Tokenizer.prototype.runContextCheck = function(contextParams) {
|
|
const index = contextParams.index;
|
|
this.contextCheckers.forEach(contextChecker => {
|
|
let contextName = contextChecker.contextName;
|
|
let openRange = this.getContext(contextName).openRange;
|
|
if (!openRange && contextChecker.checkStart(contextParams)) {
|
|
openRange = new ContextRange(index, null, contextName);
|
|
this.getContext(contextName).openRange = openRange;
|
|
this.dispatch('contextStart', [contextName, index]);
|
|
}
|
|
if (!!openRange && contextChecker.checkEnd(contextParams)) {
|
|
const offset = (index - openRange.startIndex) + 1;
|
|
const range = this.setEndOffset(offset, contextName);
|
|
this.dispatch('contextEnd', [contextName, range]);
|
|
}
|
|
});
|
|
};
|
|
|
|
/**
|
|
* Converts a text into a list of tokens
|
|
* @param {string} text a text to tokenize
|
|
*/
|
|
Tokenizer.prototype.tokenize = function (text) {
|
|
this.tokens = [];
|
|
this.resetContextsRanges();
|
|
let chars = Array.from(text);
|
|
this.dispatch('start');
|
|
for (let i = 0; i < chars.length; i++) {
|
|
const char = chars[i];
|
|
const contextParams = new ContextParams(chars, i);
|
|
this.dispatch('next', [contextParams]);
|
|
this.runContextCheck(contextParams);
|
|
let token = new Token(char);
|
|
this.tokens.push(token);
|
|
this.dispatch('newToken', [token, contextParams]);
|
|
}
|
|
this.dispatch('end', [this.tokens]);
|
|
return this.tokens;
|
|
};
|
|
|
|
export default Tokenizer;
|
|
export { Token, Event, ContextRange, ContextParams };
|