mirror of
https://github.com/JuanCanham/HackMyResume.git
synced 2024-11-22 08:20:11 +00:00
Improve keyword regex.
Better support for simple keywords like "C" or "R".
This commit is contained in:
parent
cef9a92cb6
commit
47f6aff561
@ -17,6 +17,8 @@ Keyword analysis for HackMyResume.
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Analyze the resume's use of keywords.
|
Analyze the resume's use of keywords.
|
||||||
|
TODO: BUG: Keyword search regex is inaccurate, especially for one or two
|
||||||
|
letter keywords like "C" or "CLI".
|
||||||
@class keywordInspector
|
@class keywordInspector
|
||||||
*/
|
*/
|
||||||
var keywordInspector = module.exports = {
|
var keywordInspector = module.exports = {
|
||||||
@ -37,19 +39,44 @@ Keyword analysis for HackMyResume.
|
|||||||
*/
|
*/
|
||||||
run: function( rez ) {
|
run: function( rez ) {
|
||||||
|
|
||||||
|
// "Quote" or safely escape a keyword so it can be used as a regex. For
|
||||||
|
// example, if the keyword is "C++", yield "C\+\+".
|
||||||
// http://stackoverflow.com/a/2593661/4942583
|
// http://stackoverflow.com/a/2593661/4942583
|
||||||
function regex_quote(str) {
|
function regex_quote(str) {
|
||||||
return (str + '').replace(/[.?*+^$[\]\\(){}|-]/ig, "\\$&");
|
return (str + '').replace(/[.?*+^$[\]\\(){}|-]/ig, "\\$&");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a searchable plain-text digest of the resume
|
||||||
|
// TODO: BUG: Don't search within keywords for other keywords. Job A
|
||||||
|
// declares the "foo" keyword. Job B declares the "foo & bar" keyword. Job
|
||||||
|
// B's mention of "foobar" should not count as a mention of "foo".
|
||||||
|
// To achieve this, remove keywords from the search digest and treat them
|
||||||
|
// separately.
|
||||||
var searchable = '';
|
var searchable = '';
|
||||||
rez.transformStrings( ['imp', 'computed', 'safe'], function trxString( key, val ) {
|
rez.transformStrings( ['imp', 'computed', 'safe'], function trxString( key, val ) {
|
||||||
searchable += ' ' + val;
|
searchable += ' ' + val;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Assemble a regex skeleton we can use to test for keywords with a bit
|
||||||
|
// more
|
||||||
|
var prefix = '(?:' + ['^', '\\s+', '[\\.,]+'].join('|') + ')';
|
||||||
|
var suffix = '(?:' + ['$', '\\s+', '[\\.,]+'].join('|') + ')';
|
||||||
|
|
||||||
return rez.keywords().map(function(kw) {
|
return rez.keywords().map(function(kw) {
|
||||||
|
|
||||||
|
// 1. Using word boundary or other regex class is inaccurate
|
||||||
|
//
|
||||||
// var regex = new RegExp( '\\b' + regex_quote( kw )/* + '\\b'*/, 'ig');
|
// var regex = new RegExp( '\\b' + regex_quote( kw )/* + '\\b'*/, 'ig');
|
||||||
var regex = new RegExp( regex_quote( kw ), 'ig');
|
//
|
||||||
|
// 2. Searching for the raw keyword is inaccurate ("C" will match any
|
||||||
|
// word containing a 'c'!).
|
||||||
|
//
|
||||||
|
// var regex = new RegExp( regex_quote( kw ), 'ig');
|
||||||
|
//
|
||||||
|
// 3. Instead, use a custom regex with special delimeters.
|
||||||
|
|
||||||
|
var regex_str = prefix + regex_quote( kw ) + suffix;
|
||||||
|
var regex = new RegExp( regex_str, 'ig');
|
||||||
var myArray, count = 0;
|
var myArray, count = 0;
|
||||||
while ((myArray = regex.exec( searchable )) !== null) {
|
while ((myArray = regex.exec( searchable )) !== null) {
|
||||||
count++;
|
count++;
|
||||||
|
Loading…
Reference in New Issue
Block a user