-
Notifications
You must be signed in to change notification settings - Fork 13
Improvements for captcha detection (WIP) #65
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
8a9fb06
ba6bd7c
8854d7a
c3a8202
9893626
a685dbe
51e504d
70c9e8c
ee0f85e
3378a0f
880b4aa
89ff19f
13ec477
7a887be
2505216
5e18860
987e65d
c29269a
f0d4810
56edc09
5bf99e3
0619a94
c5b822e
79e49ef
6ac33f7
9f966d0
1f7beb1
ddfba68
5fb5d1c
731c33e
58ff81e
0493f60
68092fe
664e5cf
1ffee08
c23593f
515b97e
d01af67
475ab5e
fc3476c
236c3d5
260cf9e
94aba50
9196ab2
684fc9c
bebdbcc
46a99f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,10 +10,17 @@ function Actions (options) { | |
Actions.prototype = { | ||
TYPES: { | ||
CLICK: 'click', | ||
MOUSE_DOWN: 'mousedown', | ||
MOUSE_UP: 'mouseup', | ||
WAIT: 'wait', | ||
WAIT_FOR_VISIBLE: 'waitForVisible', | ||
WAIT_FOR_PATTERN: 'waitForPattern', | ||
WAIT_FOR_PAGE: 'waitForPage', | ||
TYPE: 'type', | ||
CONDITION: 'conditionalActions', | ||
EXIST: 'exist' | ||
EXIST: 'exist', | ||
BACK: 'back', | ||
PROVIDE_COLLECTION: 'provideCollection' | ||
}, | ||
|
||
/** | ||
|
@@ -32,6 +39,22 @@ Actions.prototype = { | |
return this.performActions(actions, parentSelector); | ||
}, | ||
|
||
/** | ||
* Perform parsing rule | ||
* @param {Rule} rule | ||
* @param {string} parentSelector | ||
* @returns {Promise} | ||
*/ | ||
performPostActionsForRule: function (rule, parentSelector) { | ||
var actions = rule.postActions; | ||
|
||
if (!actions) { | ||
return vow.resolve(); | ||
} | ||
|
||
return this.performActions(actions, parentSelector); | ||
}, | ||
|
||
/** | ||
* Perform array of actions | ||
* @param {Array} actions | ||
|
@@ -76,34 +99,68 @@ Actions.prototype = { | |
debug('Perform action %o for generated selector %s', action, selector); | ||
|
||
var waitingForPage; | ||
if (action.waitForPage) { | ||
if (action.waitForPage || action.type === this.TYPES.BACK) { | ||
waitingForPage = this.waitForPage(action.waitForPageTimeout); | ||
} else { | ||
waitingForPage = vow.resolve(); | ||
} | ||
|
||
var casesPromise; | ||
if (action.cases) { | ||
casesPromise = this._performCases(action.cases, parentSelector); | ||
} | ||
|
||
var actionPromise; | ||
switch (action.type) { | ||
case this.TYPES.CLICK: | ||
actionPromise = this.click(selector); | ||
break; | ||
|
||
case this.TYPES.MOUSE_DOWN: | ||
actionPromise = this.mousedown(selector); | ||
break; | ||
|
||
case this.TYPES.MOUSE_UP: | ||
actionPromise = this.mouseup(selector); | ||
break; | ||
|
||
case this.TYPES.WAIT: | ||
actionPromise = this.waitElement(selector, action.timeout); | ||
break; | ||
|
||
case this.TYPES.WAIT_FOR_VISIBLE: | ||
actionPromise = this.waitElementIsVisible(selector, action.timeout); | ||
break; | ||
|
||
case this.TYPES.WAIT_FOR_PATTERN: | ||
actionPromise = this.waitForPattern(selector, action.pattern, action.timeout); | ||
break; | ||
|
||
case this.TYPES.WAIT_FOR_PAGE: | ||
actionPromise = this.waitForPage(action.timeout); | ||
break; | ||
|
||
case this.TYPES.TYPE: | ||
actionPromise = this.type(selector, action.text); | ||
break; | ||
|
||
case this.TYPES.CONDITION: | ||
actionPromise = this.performConditionalActions(selector, action.conditions, action.actions); | ||
actionPromise = this.performConditionalActions(selector, action.conditions, action.actions, action.elseActions); | ||
break; | ||
|
||
case this.TYPES.EXIST: | ||
actionPromise = this.exist(selector); | ||
break; | ||
|
||
case this.TYPES.BACK: | ||
actionPromise = this.back(); | ||
break; | ||
|
||
case this.TYPES.PROVIDE_COLLECTION: | ||
debug('Providing collection %o', action.collection); | ||
actionPromise = vow.resolve(action.collection); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess, better give an ability to return not only collection, but full There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep agree, will change it |
||
break; | ||
|
||
default: | ||
var customAction = this._customActions[action.type]; | ||
if (!customAction) { | ||
|
@@ -115,7 +172,46 @@ Actions.prototype = { | |
} | ||
|
||
return vow.all([actionPromise, waitingForPage]).spread(function (result) { | ||
return result; | ||
return casesPromise || result; | ||
}); | ||
}, | ||
|
||
_performCases: function (cases, parentSelector) { | ||
debug('handle several cases in parallel %o', cases); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a very great improvement!!! 🐗 |
||
|
||
var wonCase = null; | ||
var promises = cases.map(function (actions, caseNumber) { | ||
var beginningPromise = this._performAction(actions[0], parentSelector); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's consider the case where we need Based on happened action, we want to perform different actions. To handle this situation we should start waiting for both cases before click, else we are in risk not to add handlers for page loading in time. Compare two code pieces: actions.reduce(
(promise, action) => promise.then(() => performAction(action)),
vow.resolve()
);
click(); and const firstAction = performAction(actions[0])
actions.slice(1).reduce(
(promise, action) => promise.then(() => performAction(action)),
firstAction
);
click(); In first case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understood the profit of it, just care about calling actions[0] without any default value. Could be empty There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, looks like we need to filter empty chains |
||
return actions | ||
.slice(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you have empty |
||
.reduce(function (promise, action, i, array) { | ||
return promise.then(function () { | ||
if (wonCase !== null && array !== cases[wonCase]) { | ||
return vow.reject('Failed actions chain'); | ||
} | ||
|
||
if (action.trueCase) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does it mean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imagine we have 3 concurrent action chains:
On some step of each chain we can understand, that the chain is won the race (needed page is loaded, popup with needed context is shown). It's not necessary the first step, it's different for different chains.
And if such action is happening we must reject all other chains because they can affect parsing process. |
||
wonCase = caseNumber; | ||
debug('Won case with actions %o', cases[wonCase]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's forever promise battle 👍 |
||
} | ||
|
||
return this._performAction(action, parentSelector); | ||
}, this); | ||
}.bind(this), beginningPromise) | ||
.then(function (results) { | ||
if (wonCase === null) { | ||
wonCase = caseNumber; | ||
debug('Won case with actions %o', cases[wonCase]); | ||
} | ||
return results; | ||
}, function (reason) { | ||
debug('Chain %o was reject with reason %s', actions, reason); | ||
throw reason; | ||
}); | ||
}, this); | ||
|
||
return vow.any(promises).then(function () { | ||
return promises[wonCase]; | ||
}); | ||
}, | ||
|
||
|
@@ -148,10 +244,47 @@ Actions.prototype = { | |
}, [selector], timeout, interval); | ||
}, | ||
|
||
/** | ||
* Wait for an element is on the page and visible | ||
* @param {string} selector | ||
* @param {number} [timeout] | ||
* @param {number} [interval] | ||
* @returns {Promise} | ||
*/ | ||
waitElementIsVisible: function (selector, timeout, interval) { | ||
debug('._waitElementIsVisible() ' + selector); | ||
return this.wait(/* @covignore */ function (selector) { | ||
var nodes = Array.prototype.slice.call(Sizzle(selector), 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if no selector found on the page? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return nodes.some(function (node) { | ||
return node.offsetWidth !== 0 && node.offsetHeight !== 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand, how is that possible to detect element visibility on the page by this code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can't understand because it's the client side )) http://stackoverflow.com/questions/14122013/detect-if-an-element-is-visible-without-using-jquery There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hah, that was rough There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To face the truth, I was surprised too )) |
||
}); | ||
}, function (visible) { | ||
return visible; | ||
}, [selector], timeout, interval); | ||
}, | ||
|
||
/** | ||
* Wait for an element'c content matches pattern | ||
* @param {string} selector | ||
* @param {string} pattern | ||
* @param {number} [timeout] | ||
* @param {number} [interval] | ||
* @returns {Promise} | ||
*/ | ||
waitForPattern: function (selector, pattern, timeout, interval) { | ||
debug('._waitForPattern() %s on selector %s', pattern, selector); | ||
return this.wait(/* @covignore */ function (selector) { | ||
var nodes = Sizzle(selector); | ||
return nodes.length && nodes[0].textContent || ''; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep in mind new feature about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree, need to make getting content from node more universal |
||
}, function (text) { | ||
return text.match(pattern) !== null; | ||
}, [selector], timeout, interval); | ||
}, | ||
|
||
/** | ||
* Wait until function evalFunction expected in checkerFunction result | ||
* @param {Function} evalFunction | ||
* @param {Function} checkerFunction | ||
* @param {Function} [checkerFunction] | ||
* @param {Array} [args] | ||
* @param {number} [timeout] | ||
* @param {number} [interval] | ||
|
@@ -161,11 +294,25 @@ Actions.prototype = { | |
var deferred = vow.defer(); | ||
args = args || []; | ||
timeout = timeout || 5000; | ||
interval = interval || 0; | ||
interval = interval || 10; | ||
|
||
checkerFunction = checkerFunction || function (result) { | ||
return !!result | ||
}; | ||
|
||
var errback = function (msg) { | ||
clearTimeout(timeoutId); | ||
clearInterval(intervalId); | ||
deferred.reject(new Error('Error during _wait with args ' + args.toString() + ': ' + msg)); | ||
}; | ||
|
||
var timeoutId = setTimeout(function () { | ||
this._env.removeErrback(errback); | ||
clearInterval(intervalId); | ||
deferred.reject(new Error('Timeout for _wait with arguments: ' + args.toString())); | ||
}, timeout); | ||
}.bind(this), timeout); | ||
|
||
this._env.addErrback(errback); | ||
|
||
var evalArgs = args.slice(0); | ||
evalArgs.push(evalFunction); | ||
|
@@ -176,9 +323,10 @@ Actions.prototype = { | |
if (checkerFunction.apply(null, arguments)) { | ||
clearTimeout(timeoutId); | ||
clearInterval(intervalId); | ||
this._env.removeErrback(errback); | ||
deferred.resolve(); | ||
} | ||
}); | ||
}, this); | ||
}.bind(this), interval); | ||
|
||
return deferred.promise(); | ||
|
@@ -215,6 +363,26 @@ Actions.prototype = { | |
}); | ||
}, | ||
|
||
/** | ||
* Perform mousedown on the element matched by selector | ||
* @param {string} selector | ||
* @returns {Promise} | ||
*/ | ||
mousedown: function (selector) { | ||
debug('mousedown on %s', selector); | ||
return this._env.mousedown(selector); | ||
}, | ||
|
||
/** | ||
* Perform mouseup on the element matched by selector | ||
* @param {string} selector | ||
* @returns {Promise} | ||
*/ | ||
mouseup: function (selector) { | ||
debug('mouseup on %s', selector); | ||
return this._env.mouseup(selector); | ||
}, | ||
|
||
/** | ||
* Type text to the element | ||
* @param {string} selector | ||
|
@@ -249,15 +417,16 @@ Actions.prototype = { | |
* @param {string} selector | ||
* @param {Array} conditions | ||
* @param {Array} actions | ||
* @param {Array} [elseActions] | ||
* @returns {Promise} | ||
*/ | ||
performConditionalActions: function (selector, conditions, actions) { | ||
performConditionalActions: function (selector, conditions, actions, elseActions) { | ||
return this | ||
.performActions(conditions, selector) | ||
.then(function (result) { | ||
if (!result) { | ||
debug('Conditional actions failed with result %s, skip %o', result, actions); | ||
return; | ||
return elseActions ? this.performActions(elseActions, selector) : false; | ||
} | ||
|
||
debug('Conditional actions return %s, go with real some', result); | ||
|
@@ -274,6 +443,13 @@ Actions.prototype = { | |
return this._env.evaluateJs(selector, /* @covignore */ function (selector) { | ||
return Sizzle(selector).length > 0; | ||
}); | ||
}, | ||
|
||
/** | ||
* Navigates to previous page | ||
*/ | ||
back: function () { | ||
return this._env.back(); | ||
} | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ var vow = require('vow'), | |
|
||
function Environment(options) { | ||
debug('Initializing...'); | ||
|
||
this._errbacks = []; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. errback is callback for error )) In this particular case it's errbacks for |
||
} | ||
|
||
Environment.prototype = { | ||
|
@@ -50,6 +52,28 @@ Environment.prototype = { | |
*/ | ||
waitForPage: function (timeout) { | ||
throw new Error('You must redefine waitForPage method in child environment'); | ||
}, | ||
|
||
back: function () { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to add those functions to Env? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For browser env it's just |
||
throw new Error('You must redefine back method in child environment'); | ||
}, | ||
|
||
mousedown: function () { | ||
throw new Error('You must redefine back method in child environment'); | ||
}, | ||
|
||
mouseup: function () { | ||
throw new Error('You must redefine back method in child environment'); | ||
}, | ||
|
||
addErrback: function (errback) { | ||
this._errbacks.push(errback); | ||
}, | ||
|
||
removeErrback: function (errback) { | ||
this._errbacks = this._errbacks.filter(function (e) { | ||
return e !== errback; | ||
}); | ||
} | ||
}; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is it for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sometimes we do not know what are rules for parsing before actions are performed. Also we have conditional actions and last action can return collection to parse, this collection will be attached to the rule which actions are performed for.