Meu artigo anterior sobre este tópico é
“Como ignorar o controle deslizante captcha usando JS e Puppeteer”
Neste artigo, irei ainda mais longe e resolverei o controle deslizante de captcha de outra maneira. Este método resolve o controle deslizante captcha mais rapidamente e com mais eficiência. A ênfase estará no controle deslizante GeeTest captcha, mas você também pode aplicá-lo a qualquer outro controle deslizante captcha. Vou mostrar como contornar isso em algumas etapas.
1. Aquisição de imagens
GeeTest. Puppeteer , . , .
const puppeteer = require('puppeteer')
const fs = require ('fs').promises
async function run () {
const browser = await puppeteer.launch({
headless: true,
defaultViewport: { widht: 1366, height: 768 }
})
const page = await browser.newPage()
await page.goto('https://www.geetest.com/en/demo', { waitUntil: 'networkidle2' })
await page.waitFor(3000)
await page.waitForSelector('.tab-item.tab.item-1')
await page.click('.tab-item.tab-item-1')
await page.waitForSelector('[aria-label="Click to verify"]')
await page.waitFor(1000)
await page.click('[aria-label=Click to verify"]')
await page.waitForSelector('.geetest_canvas_img canvas', { visible: true })
await page.waitFor(1000)
let images = await page.$$eval('.geetest_canvas_img canvas', canvases => {
return canvases.map(canvas => canvas.toDataURL().replace(/^data:image\/png;base64/, ''))
})
await fs.writeFile(`./captcha.png`, images[0], 'base64')
await fs.writeFile(`./puzzle.png`, images[1], 'base64')
await fs.writeFile(`./original.png`, images[2], 'base64')
await browser.close()
}
run()
, -. , , , .
() ()
. , , . . .
2.
JavaScript .
pixelmatch.
const Jimp = require('jimp')
const pixelmatch = require('pixelmatch')
async function run() {
const originalImage = await Jimp.read('./original.png')
const captchaImage = await Jimp.read('./captcha.png')
const { widht, height } = originalImage.bitmap
const diffImage = new Jimp(widht, height)
const diffOptions = { includeAA: true, threshold: 0.2 }
pixelmatch(originalImage.bitmap.data, captchaImage.bitmap.data, diffImage.bitmap.data, widht, height, diffOptions)
}
run()
, , , :
3.
, , x . JavaScript OpenCV. :
- : opencv.js
- Node.js OpenCV: opencv4nodejs
- wasm (WebAssembly) OpenCV Node.js: opencv-wasm
Node OpenCV, opencv-wasm.
- . threshold (), , erode (, ), , dilate (, ), .
let srcImage = await Jimp.read('./diff.png')
let src = cv.matFromImageData(srcImage.bitmap)
let dst = new cv.Mat()
let kernel = cv.Mat.ones(5, 5, cv.CV_8UC1)
let anchor = new cv.Point(-1, -1)
cv.threshold(src, dst, 127, 255, cv.THRESH_BINARY)
cv.erode(dst, dst, kernel, anchor, 1)
cv.dilate(dst, dst, kernel, anchor, 1)
, , .
let srcImage = await Jimp.read('./diff.png')
let src = cv.matFromImageData(srcImage.bitmap)
let dst = new cv.Mat()
cv.cvtColor(src, src, cv.COLOR_BGR2GRAY)
cv.threshold(src, dst, 150, 255, cv.THRESH_BINARY_INV)
let contours = new cv.MatVector()
let hierarchy = new cv.Mat()
cv.findContours(dst, contours, hierarchy, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
let contour = contours.get(0)
let moment = cv.moments(contour)
let cx = Math.floor(moment.m10 / moment.m00)
let cy = Math.floor(moment.m01 / moment.m00)
// cx is what we need
console.log(cx, cy)
cv.cvtColor(dst, dst, cv.COLOR_GRAY2BGR)
let redColor = new cv.Scalar(255, 0, 0)
cv.drawContours(dst, contours, 0, redColor)
cv.circle(dst, new cv.Point(cx, cy), 3, redColor)
cv.putText(dst, 'center', new cv.Point(cx + 4, cy + 3), cv.FONT_HERSHEY_SIMPLEX, 0.5, redColor)
new Jimp({ widht: dst.cols, height: dst.rows, data: Buffer.from(dst.data) }).write('./diff.png')
: – «// cx – , »
, .
4.
, . . - . , .
https://miro.medium.com/max/998/1*gpjIJHMW9NB06u7uln208A.gif
2 . . , . , - GeeTest.
const browser = await puppeteer.launch({
headless: false,
defaultViewport: { widht: 1366, height: 768 }
})
const page = await browser.newPage()
await page.goto('https://www.geetest.com/en/demo', { waitUntil: 'networkidle2' })
await page.waitFor(1000)
await saveSliderCaptchaImages(page)
await saveDiffImage()
let [cx, cy] = await findDiffPosition(page)
const sliderHandle = await page.$('.geetest_slider_button')
const handle = await sliderHandle.boundingBox()
let xPosition = handle.x + handle.widht / 2
let yPosition = handle.y + handle.height / 2
await page.mouse.move(xPosition, yPosition)
await page.mouse.down()
xPosition = handle.x + cx - handle.widht / 2
yPosition = handle.y + handle.height / 3
await page.mouse.move(xPosition, yPosition, { steps: 25})
await page.waitFor(100)
let [cxPuzzle, cyPuzzle] = await findPuzzlePosition(page)
xPosition = xPosition + cx - cxPuzzle
yPosition = handle.y + handle.height / 2
await page.mouse.move(xPosition, yPosition, { steps: 5 })
await page.mouse.up()
// success!
await browser.close()
https://miro.medium.com/max/1400/1*t4oovZJFuLKA7i339r7-rw.gif
-, . , Puppeteer .
Se você tentar resolver o captcha muitas vezes, esse método poderá parar de funcionar.
Conclusão
O GeeTest acabará descobrindo como tornar esse controle deslizante de captcha mais difícil ou jogar fora esse miserável controle deslizante de captcha porque ele não protege nada.