diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100755 index 0000000..86d74fe --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,3 @@ +edition = "2021" +max_width = 80 +tab_spaces = 4 diff --git a/Cargo.toml b/Cargo.toml index 59ac9c0..9e4a995 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,11 +10,11 @@ license = "MIT" keywords = ["captcha", "amazon", "amazon-captcha", "amazon-captcha-rs"] categories = ["multimedia::images", "web-programming"] -[dev-dependencies] -reqwest = "0.11.20" -tokio = { version = "1", features = ["full"] } -regex = "1" - [dependencies] image = "0.24.7" bincode = "1.3.3" + +[dev-dependencies] +reqwest = "0.11.20" +tokio = { version = "1.32.0", features = ["macros", "rt-multi-thread"] } +regex = "1.9.6" diff --git a/README.md b/README.md index 255bc8e..472df64 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,12 @@ Amazon captchas exhibit a repetitive nature, with characters consistently less t ## Usage Example ```rust -use amazon_captcha_rs::new_solver; +use amazon_captcha_rs::Solver; let image = image::open("captcha.jpg").unwrap(); -let solver = new_solver(); -let response = solver.resolve_image(&image).unwrap(); +let solver = Solver::new().unwrap(); +let response = solver.resolve_image(&image); assert_eq!(response, "caggpa"); ``` diff --git a/examples/benchmark.rs b/examples/benchmark.rs index d4adea6..57074c1 100644 --- a/examples/benchmark.rs +++ b/examples/benchmark.rs @@ -1,63 +1,46 @@ //! Test the precision of the captcha solver -use std::time::Instant; +use std::{fs, time::Instant}; -extern crate amazon_captcha_rs; +use amazon_captcha_rs::Solver; fn main() { - let start = Instant::now(); - let files = std::fs::read_dir("examples/dataset").unwrap(); - let solver = amazon_captcha_rs::new_solver(); + let solver = Solver::new().unwrap(); - let mut solved = 0; - let mut total = 0; + let (mut resolved, mut total) = (0u32, 0u32); let mut times = Vec::new(); - files.for_each(|file| { - let now = Instant::now(); - let file = file.unwrap(); - let path = file.path(); - + for file in fs::read_dir("examples/dataset").unwrap() { + let path = file.unwrap().path(); let expect = path - .as_path() .file_name() - .unwrap() + .unwrap_or_default() .to_str() - .unwrap() - .split(".") + .unwrap_or_default() + .split('.') .next() - .unwrap(); + .unwrap_or_default(); + if expect.len() < 6 { return; } total += 1; - let img = image::open(&path).unwrap(); - let Some(result) = solver.resolve_image(&img) else { - println!("{:?}: Failed to resolve", &path.as_path()); - return; - }; + let now = Instant::now(); + let result = solver.resolve_image(&image::open(&path).unwrap()); + times.push(now.elapsed().as_millis()); if expect == result { - solved += 1; + resolved += 1; } else { - println!( - "{:?}: Expect '{}', got '{}'", - &path.as_path(), - expect, - result - ); + eprintln!("{path:?}: Expected '{expect}', got '{result}'"); } + } - times.push(now.elapsed().as_millis()); - }); - - println!("Solved: {}/{}", solved, total); - println!("Precision: {:.2}%", solved as f32 / total as f32 * 100.0); println!( - "Average time: {:.2}ms", + "Resolved: {resolved}/{total}\nPrecision: {:.2}%\nAverage Time: {:.2}ms", + resolved as f32 / total as f32 * 100.0, times.iter().sum::() as f32 / times.len() as f32 ); - println!("Total time: {:.2}s", start.elapsed().as_secs_f32()); } diff --git a/examples/benchmark_infinite.rs b/examples/benchmark_infinite.rs index 37a5262..8ef7bcc 100644 --- a/examples/benchmark_infinite.rs +++ b/examples/benchmark_infinite.rs @@ -1,56 +1,64 @@ //! Benchmark running on an infinite loop. +use std::{error, time::Instant}; + use amazon_captcha_rs::Solver; use image::EncodableLayout; use regex::Regex; +use reqwest::Client; #[tokio::main] -async fn main() -> Result<(), Box> { - let solver = amazon_captcha_rs::new_solver(); +async fn main() -> Result<(), Box> { + let solver = Solver::new()?; - let mut resolved = 0; - let mut total = 0; + let code_regex = Regex::new(r#"name="amzn" value="(.*?)" \/>"#)?; + let img_regex = Regex::new(r#""#)?; - loop { - let ok = resolve_captcha(&solver).await?; + let (mut resolved, mut total) = (0u32, 0u32); + let mut times = Vec::new(); + loop { total += 1; - if ok { - resolved += 1; - } - - if total % 10 == 0 { - println!("Resolved: {}/{}", resolved, total); - println!("Precision: {:.2}%", resolved as f32 / total as f32 * 100.0); - } - } -} -async fn resolve_captcha(solver: &Solver) -> Result> { - let text = reqwest::get("https://www.amazon.com/errors/validateCaptcha") - .await? - .text() - .await?; + let text = + reqwest::get("https://www.amazon.com/errors/validateCaptcha") + .await? + .text() + .await?; - let img_regex = Regex::new(r#""#)?; - let cap = img_regex.captures(&text).unwrap(); - let url = cap.get(1).unwrap().as_str(); + let code = code_regex.captures(&text).unwrap().get(1).unwrap().as_str(); - let code_regex = Regex::new(r#"name="amzn" value="(.*?)" \/>"#).unwrap(); - let code = code_regex.captures(&text).unwrap().get(1).unwrap().as_str(); + let url = img_regex.captures(&text).unwrap().get(1).unwrap().as_str(); + let img = image::load_from_memory( + reqwest::get(url).await?.bytes().await?.as_bytes(), + )?; - let img = reqwest::get(url).await?.bytes().await?; - let img = image::load_from_memory(img.as_bytes())?; + let now = Instant::now(); + let result = solver.resolve_image(&img); + times.push(now.elapsed().as_millis()); - let Some(result) = solver.resolve_image(&img) else { - return Ok(false); - }; - - let response = reqwest::Client::new() - .get("https://www.amazon.com/errors/validateCaptcha") - .query(&[("amzn", code), ("amzn-r", "/"), ("field-keywords", &result)]) - .send() - .await?; + if Client::new() + .get("https://www.amazon.com/errors/validateCaptcha") + .query(&[ + ("amzn", code), + ("amzn-r", "/"), + ("field-keywords", &result), + ]) + .send() + .await? + .url() + .to_string() + == "https://www.amazon.com/" + { + resolved += 1; + } - Ok(response.url().to_string() == "https://www.amazon.com/") + if total % 10 == 0 { + println!( + "Resolved: {resolved}/{total}\nPrecision: {:.2}%\nAverage Time: {:.2}ms", + resolved as f32 / total as f32 * 100.0, + times.iter().sum::() as f32 / times.len() as f32 + ); + } + } } diff --git a/examples/dataset.rs b/examples/dataset.rs index f3f67bf..07b8cb7 100644 --- a/examples/dataset.rs +++ b/examples/dataset.rs @@ -1,36 +1,31 @@ //! Download captcha image to test precision +use std::{error, fs}; + use regex::Regex; -use std::{fs::File, io::Write}; #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() -> Result<(), Box> { println!("Downloading captcha images..."); - for i in 0..100 { - let img = download_captcha().await?; - - let path = format!("examples/dataset/{}.jpg", i); + let img_regex = Regex::new(r#""#)?; - File::create(path)?.write_all(&img)?; + for i in 0..100 { + let text = + reqwest::get("https://www.amazon.com/errors/validateCaptcha") + .await? + .text() + .await?; + + let url = img_regex.captures(&text).unwrap().get(1).unwrap().as_str(); + + fs::write( + format!("examples/dataset/{i}.jpg"), + reqwest::get(url).await?.bytes().await?, + )?; } println!("Done!"); Ok(()) } - -async fn download_captcha() -> Result, Box> { - let text = reqwest::get("https://www.amazon.com/errors/validateCaptcha") - .await? - .text() - .await?; - - let re = Regex::new(r#""#)?; - let cap = re.captures(&text).unwrap(); - let url = cap.get(1).unwrap().as_str(); - - let img = reqwest::get(url).await?.bytes().await?; - - Ok(img.to_vec()) -} diff --git a/src/lib.rs b/src/lib.rs index 81a6eac..4d7819e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,211 +1,161 @@ -//! Amazon Captcha Solver -//! -//! This library has been highly inspired by: -//! - a-maliarov/amazoncaptcha (Python) -//! - gopkg-dev/amazoncaptcha (Go) -//! -//! Some methods are re-used from these libraries, -//! the dataset is also re-used from gopkg but -//! converted to uncompressed bincode format which makes -//! it much faster to load. -//! -//! # Example -//! -//! ``` -//! use amazon_captcha_rs::new_solver; -//! -//! let image = image::open("examples/dataset/aatmag.jpg").unwrap(); -//! -//! let solver = new_solver(); -//! let response = solver.resolve_image(&image).unwrap(); -//! -//! assert_eq!(response, "aatmag"); -//! ``` +/*! +Amazon Captcha Solver + +This library has been highly inspired by: +- a-maliarov/amazoncaptcha (Python) +- gopkg-dev/amazoncaptcha (Go) + +Some methods are re-used from these libraries, +the dataset is also re-used from gopkg but +converted to uncompressed bincode format which makes +it much faster to load. + +# Example + +``` +use amazon_captcha_rs::Solver; + +let image = image::open("examples/dataset/aatmag.jpg").unwrap(); + +let solver = Solver::new().unwrap(); +let response = solver.resolve_image(&image); + +assert_eq!(response, "aatmag"); +``` +*/ -use image::{imageops, DynamicImage, GenericImage, GrayImage}; use std::collections::HashMap; -use std::error::Error; + +use image::{imageops, DynamicImage, GenericImage, GrayImage}; /// Solver implementation -/// -/// Use `new_solver` to create a new instance pub struct Solver { training_data: HashMap, } -/// Create a new solver instance -/// -/// This method will load the training data. -pub fn new_solver() -> Solver { - Solver { - training_data: Solver::load_training_data().unwrap(), - } -} - impl Solver { - /// Load training data from dataset.bin - /// - /// This method will load the training data from the dataset.bin file - /// which is included in the crate. - fn load_training_data() -> Result, Box> { - let bytes = include_bytes!("../dataset.bin"); - let training_data: HashMap = bincode::deserialize_from(&bytes[..])?; - - Ok(training_data) + /** + Creates a new [`Solver`] using the training data from + `dataset.bin` (included in the crate) + + # Errors + + Refer to [`bincode::Error`] + */ + pub fn new() -> Result { + Ok(Solver { + training_data: bincode::deserialize(include_bytes!( + "../dataset.bin" + ))?, + }) } - /// Calculate the similarity between two binary strings - /// - /// This method is used when the exact binary string is not found in the training data. - fn bin_similarity(a: &str, b: &str) -> f32 { - let mut score = 0.0; + /** + Resolves a captcha image using training data - for (a, b) in a.chars().zip(b.chars()) { - if a == b { - score += 1.0; - } - } + # Example - score / a.len() as f32 - } + ``` + use amazon_captcha_rs::Solver; - /// Resolve a captcha image - /// - /// This method will try to resolve the captcha image and return the result as a string. - /// - /// # Example - /// - /// ``` - /// use amazon_captcha_rs::new_solver; - /// - /// let image = image::open("examples/dataset/cxkgmg.jpg").unwrap(); - /// - /// let solver = new_solver(); - /// let response = solver.resolve_image(&image).unwrap(); - /// - /// assert_eq!(response, "cxkgmg"); - /// ``` - pub fn resolve_image(&self, image: &DynamicImage) -> Option { - let mut letters = self.extract_letters(image); - let mut result = String::new(); + let image = image::open("examples/dataset/cxkgmg.jpg").unwrap(); - if letters.len() == 7 { - let last_letter = self.merge_images(vec![ - letters.last().unwrap().clone(), - letters.first().unwrap().clone(), - ]); + let solver = Solver::new().unwrap(); + let response = solver.resolve_image(&image); + + assert_eq!(response, "cxkgmg"); + ``` + */ + pub fn resolve_image(&self, image: &DynamicImage) -> String { + let mut letters = extract_letters(image); + if letters.len() == 7 { + letters[6] = merge_images(&letters[6], &letters[0]); letters.remove(0); - letters[5] = last_letter; } - for img in letters { - let mut binary = String::new(); - - for pixel in img.pixels() { - if pixel.0[0] <= 1 { - binary.push('1'); - } else { - binary.push('0'); - } - } + let mut resolved = String::new(); - let mut matching: Option = None; - self.training_data.get(&binary).map(|letter| matching = Some(*letter)); - - if let Some(letter) = matching { - result.push(letter); - } else { - let mut scores: HashMap = HashMap::new(); - self.training_data.iter().for_each(|(key, value)| { - let score = scores.get(value).unwrap_or(&0.0); - scores.insert(*value, score.max(Solver::bin_similarity(&binary, key))); - }); - - let mut max_score = 0.0; - let mut max_letter: char = ' '; - - for (letter, score) in scores.iter() { - if score > &max_score { - max_score = *score; - max_letter = *letter; - } - } - - result.push(max_letter); - } + for img in letters { + let binary = img + .pixels() + .map(|pixel| if pixel.0[0] <= 1 { '1' } else { '0' }) + .collect::(); + + resolved.push( + *self + .training_data + .get(&binary) + .unwrap_or_else(|| self.most_similar_letter(&binary)), + ); } - Some(result.to_lowercase()) + resolved.to_lowercase() } - /// Merge multiple images side by side - fn merge_images(&self, images: Vec) -> GrayImage { - let mut width = 0; - let mut height = 0; - - for image in &images { - let (w, h) = image.dimensions(); - - width += w; - height = h.max(height); - } - - let mut result = GrayImage::new(width, height); - - let mut x = 0; - - for image in images { - let (w, _) = image.dimensions(); - - result.copy_from(&image, x, 0).unwrap(); - - x += w; + /// Returns most similar letter + fn most_similar_letter(&self, letter: &str) -> &char { + let mut max_letter = &' '; + let mut max_score = usize::MIN; + + for (key, value) in &self.training_data { + let score = letter + .chars() + .zip(key.chars()) + .filter(|(a, b)| a == b) + .count(); + + if score > max_score { + max_score = score; + max_letter = value; + } } - result + max_letter } +} - /// Extract letters from an image - /// - /// This method will extract letters from an image and return them as a vector of images. - fn extract_letters(&self, image: &DynamicImage) -> Vec { - let image = imageops::grayscale(image); - let (width, heigth) = image.dimensions(); - - let mut rects: Vec<(u32, u32)> = Vec::new(); - let mut start: Option = None; +/// Merge two images side by side +fn merge_images(img1: &GrayImage, img2: &GrayImage) -> GrayImage { + let (width1, height1) = img1.dimensions(); + let (width2, height2) = img2.dimensions(); - for x in 0..width { - let mut black = false; + let mut merged_image = + GrayImage::new(width1 + width2, height1.max(height2)); - for y in 0..heigth { - if image.get_pixel(x, y)[0] <= 1 { - black = true; - break; - } - } + merged_image.copy_from(img1, 0, 0).unwrap(); + merged_image.copy_from(img2, width1, 0).unwrap(); - if black { - if start.is_none() { - start = Some(x); - } - } else if let Some(point) = start { - rects.push((point, x)); - start = None; - } - } + merged_image +} - if let Some(point) = start { - rects.push((point, width)); - } +/// Extracts letters from an image +fn extract_letters(img: &DynamicImage) -> Vec { + let img = imageops::grayscale(img); + let (width, height) = img.dimensions(); - let mut letters: Vec = Vec::new(); + let mut rects = Vec::new(); + let mut start = None; - for (min_x, max_x) in rects { - letters.push(imageops::crop_imm(&image, min_x, 0, max_x - min_x, heigth).to_image()); + for x in 0..width { + if (0..height).any(|y| img.get_pixel(x, y)[0] <= 1) { + if start.is_none() { + start = Some(x); + } + } else if let Some(point) = start { + rects.push((point, x)); + start = None; } + } - letters + if let Some(point) = start { + rects.push((point, width)); } + + rects + .iter() + .map(|(x1, x2)| { + imageops::crop_imm(&img, *x1, 0, x2 - x1, height).to_image() + }) + .collect() }