From b8a7ab8fa347812fe69d0cce5af475a865221b3b Mon Sep 17 00:00:00 2001 From: prescientmoon Date: Sun, 11 Aug 2024 03:48:20 +0200 Subject: [PATCH] Added more docs to the code! Signed-off-by: prescientmoon --- src/bitmap.rs | 8 ++++++++ src/levenshtein.rs | 4 +++- src/logs.rs | 8 ++++++++ src/recognition/fuzzy_song_name.rs | 12 ++++++++++++ src/recognition/hyperglass.rs | 25 +++++++++++++++++++++++-- src/transform.rs | 9 ++++++++- 6 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/bitmap.rs b/src/bitmap.rs index 65f308a..d0b830b 100644 --- a/src/bitmap.rs +++ b/src/bitmap.rs @@ -1,3 +1,11 @@ +//! This module implements my own bitmap and layout based system. +//! +//! I created those as a result to my annoyance of how bad / limited +//! plotters is at rendering text and creating layouts in a clean manner. +//! +//! There's still stuff to be implemented here, like a cache for glyphs and +//! whatnot, but this does run pretty stably for the b30 renderer. + use freetype::{ bitmap::PixelMode, face::{KerningMode, LoadFlag}, diff --git a/src/levenshtein.rs b/src/levenshtein.rs index 4ca4be8..135e9df 100644 --- a/src/levenshtein.rs +++ b/src/levenshtein.rs @@ -1,4 +1,6 @@ -// Modified version of https://docs.rs/edit-distance/latest/src/edit_distance/lib.rs.html#1-76 +//! Modified version of https://docs.rs/edit-distance/latest/src/edit_distance/lib.rs.html#1-76 +//! The primary modification is providing a no-allocation variant +//! for efficient consecutive calls. /// Similar to `edit_distance`, but takes in a preallocated vec so consecutive calls are efficient. pub fn edit_distance_with(a: &str, b: &str, cur: &mut Vec) -> usize { diff --git a/src/logs.rs b/src/logs.rs index 58e9c91..b86d83e 100644 --- a/src/logs.rs +++ b/src/logs.rs @@ -1,3 +1,11 @@ +//! One of the goals of the bot is to never save user-images to disk (for +//! performance and safety reasons), opting to perform operations in-memory +//! instead. +//! +//! While great in practice, this makes debugging much harder. This module +//! allows for a convenient way to throw images into a `logs` directory with +//! a simple env var. + use std::{env, ops::Deref}; use image::{DynamicImage, EncodableLayout, ImageBuffer, PixelWithColorType}; diff --git a/src/recognition/fuzzy_song_name.rs b/src/recognition/fuzzy_song_name.rs index 2ec49f1..32d0286 100644 --- a/src/recognition/fuzzy_song_name.rs +++ b/src/recognition/fuzzy_song_name.rs @@ -1,3 +1,15 @@ +//! This module implements a clunky but reliable way of fuzzy-finding an arcaea +//! chart names. This algorithm is left-biased, in case the right half of the +//! name is being covered by character arc. +//! +//! This module also makes use of an +//! extra shorthand system, with shorthands defined in the repo in +//! `data/shorthands.csv` and imported by `scripts/main.py`. The shorthands are +//! useful for non-ascii song names, or when trying to bridge the gap to how +//! the game supposedly refers to some names internally (I do *not* use any +//! databases extracted from the game, but this is still useful for having a +//! "canonical" way to refer to some weirdly-named charts). + use crate::arcaea::chart::{Chart, Difficulty, Song, SongCache}; use crate::context::{Error, UserContext}; use crate::levenshtein::edit_distance_with; diff --git a/src/recognition/hyperglass.rs b/src/recognition/hyperglass.rs index 1595e94..c2d52e1 100644 --- a/src/recognition/hyperglass.rs +++ b/src/recognition/hyperglass.rs @@ -1,3 +1,26 @@ +//! Hyperglass my own specialized OCR system, created as a result of my +//! annoyance with how unreliable tesseract is. Assuming we know the font, +//! OCR should be almost perfect, even when faced with stange kerning. This is +//! what this module achieves! +//! +//! The algorithm is pretty simple: +//! 1. Find the connected components (i.e., "black areas") in the image. +//! 2. Finds the bounding box of each connected component. +//! 3. Discard connected components which are too large (these are likely bars, +//! or other artifacts). +//! 4. Sort the components by x-position. +//! 5. Compute the largest width & height of the connected components. +//! 5. Split each component (more precisely, start at it's top-left corner and +//! split an area equal to the aforementioned width & height) into a grid of +//! N^2 chunks (N=5 at the moment), and use that to generate a vector who's +//! elements represent the percentage of black pixels in each chunk which +//! belong to the connected component at hand. +//! 6. Normalise the vectors to remain font-weight independent. +//! 7. Find the nearest neighbour of each vector among a list of precomputed +//! vectors for the font in the image, thus reconstructing the string! The +//! aforementioned precomputed vectors are generated using almost the exact +//! procedure described in steps 1-6, except the images are generated at +//! startup using my very own bitmap rendering module (`crate::bitmap`). use freetype::Face; use image::{DynamicImage, ImageBuffer, Luma}; use imageproc::{ @@ -12,8 +35,6 @@ use crate::{ logs::{debug_image_buffer_log, debug_image_log}, }; -///! Hyperglass my own specialized OCR system - // {{{ ConponentVec /// How many sub-segments to split each side into const SPLIT_FACTOR: u32 = 5; diff --git a/src/transform.rs b/src/transform.rs index d1d4250..9272787 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1,3 +1,10 @@ +//! This file implements the "rotation as shearing" algorithm, +//! which can rotate images without making use of any trigonometric +//! functions (or working with floats altogether, if you don't care +//! about antialiasing). +//! +//! For more information, consult this article: https://www.ocf.berkeley.edu/~fricke/projects/israel/paeth/rotation_by_shearing.html + use image::{DynamicImage, GenericImage, GenericImageView}; use crate::bitmap::{Position, Rect}; @@ -28,7 +35,7 @@ pub fn xshear(image: &mut DynamicImage, rect: Rect, center: Position, shear: f32 } } -/// Performs a horizontal shear operation, without performing anti-aliasing +/// Performs a vertical shear operation, without performing anti-aliasing pub fn yshear(image: &mut DynamicImage, rect: Rect, center: Position, shear: f32) { let height = rect.height as i32; for x in rect.x..rect.x + rect.width as i32 {