1
Fork 0

No longer use tesseract for score OCR (tesseract is terrrrrible)

Signed-off-by: prescientmoon <git@moonythm.dev>
This commit is contained in:
prescientmoon 2024-08-11 03:14:02 +02:00
parent 86e5debe95
commit 4373b6ca62
Signed by: prescientmoon
SSH key fingerprint: SHA256:UUF9JT2s8Xfyv76b8ZuVL7XrmimH4o49p4b+iexbVH4
20 changed files with 1145 additions and 845 deletions

142
Cargo.lock generated
View file

@ -2,6 +2,22 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "ab_glyph"
version = "0.2.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79faae4620f45232f599d9bc7b290f88247a0834162c4495ab2f02d60004adfb"
dependencies = [
"ab_glyph_rasterizer",
"owned_ttf_parser",
]
[[package]]
name = "ab_glyph_rasterizer"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71b1793ee61086797f5c80b6efa2b8ffa6d5dd703f118545808a7f2e27f7046"
[[package]] [[package]]
name = "addr2line" name = "addr2line"
version = "0.22.0" version = "0.22.0"
@ -71,6 +87,15 @@ version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "approx"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "arbitrary" name = "arbitrary"
version = "1.3.2" version = "1.3.2"
@ -989,8 +1014,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"js-sys",
"libc", "libc",
"wasi", "wasi",
"wasm-bindgen",
] ]
[[package]] [[package]]
@ -1204,7 +1231,7 @@ dependencies = [
[[package]] [[package]]
name = "hypertesseract" name = "hypertesseract"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=78dd8ab#78dd8ab1bbab9d7985959a5a8ac2746bce17ff5c" source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=4e05063#4e050634f50a58b9be85018439a0b1a23b59de35"
dependencies = [ dependencies = [
"image 0.25.2", "image 0.25.2",
"sys", "sys",
@ -1297,6 +1324,24 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "imageproc"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2393fb7808960751a52e8a154f67e7dd3f8a2ef9bd80d1553078a7b4e8ed3f0d"
dependencies = [
"ab_glyph",
"approx",
"getrandom",
"image 0.25.2",
"itertools",
"nalgebra",
"num",
"rand",
"rand_distr",
"rayon",
]
[[package]] [[package]]
name = "imgref" name = "imgref"
version = "1.10.1" version = "1.10.1"
@ -1469,6 +1514,16 @@ dependencies = [
"imgref", "imgref",
] ]
[[package]]
name = "matrixmultiply"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a"
dependencies = [
"autocfg",
"rawpointer",
]
[[package]] [[package]]
name = "maybe-rayon" name = "maybe-rayon"
version = "0.1.1" version = "0.1.1"
@ -1553,6 +1608,21 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "nalgebra"
version = "0.32.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4"
dependencies = [
"approx",
"matrixmultiply",
"num-complex",
"num-rational",
"num-traits",
"simba",
"typenum",
]
[[package]] [[package]]
name = "new_debug_unreachable" name = "new_debug_unreachable"
version = "1.0.6" version = "1.0.6"
@ -1720,6 +1790,15 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "owned_ttf_parser"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490d3a563d3122bf7c911a59b0add9389e5ec0f5f0c3ac6b91ff235a0e6a7f90"
dependencies = [
"ttf-parser 0.24.1",
]
[[package]] [[package]]
name = "parking" name = "parking"
version = "2.2.0" version = "2.2.0"
@ -1842,7 +1921,7 @@ dependencies = [
"plotters-backend", "plotters-backend",
"plotters-bitmap", "plotters-bitmap",
"plotters-svg", "plotters-svg",
"ttf-parser", "ttf-parser 0.15.2",
"wasm-bindgen", "wasm-bindgen",
"web-sys", "web-sys",
] ]
@ -2017,6 +2096,16 @@ dependencies = [
"getrandom", "getrandom",
] ]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand",
]
[[package]] [[package]]
name = "rav1e" name = "rav1e"
version = "0.7.1" version = "0.7.1"
@ -2067,6 +2156,12 @@ dependencies = [
"rgb", "rgb",
] ]
[[package]]
name = "rawpointer"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.10.0" version = "1.10.0"
@ -2329,6 +2424,15 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "safe_arch"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a"
dependencies = [
"bytemuck",
]
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"
@ -2497,6 +2601,7 @@ dependencies = [
"freetype-rs", "freetype-rs",
"hypertesseract", "hypertesseract",
"image 0.25.2", "image 0.25.2",
"imageproc",
"num", "num",
"plotters", "plotters",
"poise", "poise",
@ -2514,6 +2619,19 @@ dependencies = [
"rand_core", "rand_core",
] ]
[[package]]
name = "simba"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae"
dependencies = [
"approx",
"num-complex",
"num-traits",
"paste",
"wide",
]
[[package]] [[package]]
name = "simd-adler32" name = "simd-adler32"
version = "0.3.7" version = "0.3.7"
@ -2859,7 +2977,7 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]] [[package]]
name = "sys" name = "sys"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=78dd8ab#78dd8ab1bbab9d7985959a5a8ac2746bce17ff5c" source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=4e05063#4e050634f50a58b9be85018439a0b1a23b59de35"
dependencies = [ dependencies = [
"openssl-sys", "openssl-sys",
"pkg-config", "pkg-config",
@ -2927,7 +3045,7 @@ dependencies = [
[[package]] [[package]]
name = "thin" name = "thin"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=78dd8ab#78dd8ab1bbab9d7985959a5a8ac2746bce17ff5c" source = "git+https://github.com/BlueGhostGH/hypertesseract.git?rev=4e05063#4e050634f50a58b9be85018439a0b1a23b59de35"
dependencies = [ dependencies = [
"sys", "sys",
] ]
@ -3188,6 +3306,12 @@ version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b3e06c9b9d80ed6b745c7159c40b311ad2916abb34a49e9be2653b90db0d8dd" checksum = "7b3e06c9b9d80ed6b745c7159c40b311ad2916abb34a49e9be2653b90db0d8dd"
[[package]]
name = "ttf-parser"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be21190ff5d38e8b4a2d3b6a3ae57f612cc39c96e83cedeaf7abc338a8bac4a"
[[package]] [[package]]
name = "tungstenite" name = "tungstenite"
version = "0.21.0" version = "0.21.0"
@ -3496,6 +3620,16 @@ dependencies = [
"wasite", "wasite",
] ]
[[package]]
name = "wide"
version = "0.7.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "901e8597c777fa042e9e245bd56c0dc4418c5db3f845b6ff94fbac732c6a0692"
dependencies = [
"bytemuck",
"safe_arch",
]
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"

View file

@ -11,8 +11,9 @@ num = "0.4.3"
plotters = { git="https://github.com/starlitcanopy/plotters.git", rev="986cd959362a2dbec8d1b25670fd083b904d7b8c", features=["bitmap_backend"] } plotters = { git="https://github.com/starlitcanopy/plotters.git", rev="986cd959362a2dbec8d1b25670fd083b904d7b8c", features=["bitmap_backend"] }
poise = "0.6.1" poise = "0.6.1"
sqlx = { version = "0.8.0", features = ["sqlite", "runtime-tokio", "chrono"] } sqlx = { version = "0.8.0", features = ["sqlite", "runtime-tokio", "chrono"] }
hypertesseract = { features=["image"], git="https://github.com/BlueGhostGH/hypertesseract.git", rev="78dd8ab" } hypertesseract = { features=["image"], git="https://github.com/BlueGhostGH/hypertesseract.git", rev="4e05063" }
tokio = {version="1.38.0", features=["rt-multi-thread"]} tokio = {version="1.38.0", features=["rt-multi-thread"]}
imageproc = "0.25.0"
[profile.dev.package."*"] [profile.dev.package."*"]
opt-level = 3 opt-level = 3

File diff suppressed because it is too large Load diff

22
scripts/import_jacket.sh Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env bash
if [ "$#" != 2 ]; then
echo "Usage: $0 <name> <url>"
exit 1
fi
name=$1
url=$2
curr=$(pwd)
dir_path=$SHIMMERING_DATA_DIR/songs/$name
mkdir $dir_path
cd $dir_path
http GET "$url" > temp
convert ./temp ./base.jpg
convert ./base.jpg -resize 256x256 ./base_256.jpg
rm temp
cd $curr

View file

@ -52,7 +52,7 @@ def import_charts_from_csv():
[note_design, level, cc, note_count] = charts[i * 4 : (i + 1) * 4] [note_design, level, cc, note_count] = charts[i * 4 : (i + 1) * 4]
if note_design == "N/A": if note_design == "N/A":
continue continue
chart_count += 2 chart_count += 1
[difficulty, level] = level.split(" ") [difficulty, level] = level.split(" ")

View file

@ -1,5 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
dir_path=./data/songs dir_path=$SHIMMERING_DATA_DIR/songs
# Find all files in the directory and its subdirectories # Find all files in the directory and its subdirectories
find "$dir_path" -type f | while read -r file; do find "$dir_path" -type f | while read -r file; do

4
scripts/reimport-songs.sh Executable file
View file

@ -0,0 +1,4 @@
#!/usr/bin/env bash
echo "delete from songs" | sqlite3 $SHIMMERING_DATA_DIR/db.sqlite
echo "delete from charts" | sqlite3 $SHIMMERING_DATA_DIR/db.sqlite
./scripts/main.py import charts

View file

@ -89,6 +89,8 @@ pub struct Song {
pub id: u32, pub id: u32,
pub title: String, pub title: String,
pub lowercase_title: String, pub lowercase_title: String,
#[allow(dead_code)]
pub artist: String, pub artist: String,
pub bpm: String, pub bpm: String,

View file

@ -22,7 +22,7 @@ pub struct ImageVec {
impl ImageVec { impl ImageVec {
// {{{ (Image => vector) encoding // {{{ (Image => vector) encoding
fn from_image(image: &impl GenericImageView<Pixel = Rgba<u8>>) -> ImageVec { fn from_image(image: &impl GenericImageView<Pixel = Rgba<u8>>) -> Self {
let mut colors = [0.0; IMAGE_VEC_DIM]; let mut colors = [0.0; IMAGE_VEC_DIM];
let chunk_width = image.width() / SPLIT_FACTOR; let chunk_width = image.width() / SPLIT_FACTOR;
let chunk_height = image.height() / SPLIT_FACTOR; let chunk_height = image.height() / SPLIT_FACTOR;
@ -176,6 +176,16 @@ impl JacketCache {
} }
} }
for chart in song_cache.charts() {
if chart.cached_jacket.is_none() {
println!(
"No jacket found for '{} [{:?}]'",
song_cache.lookup_song(chart.song_id)?.song.title,
chart.difficulty
)
}
}
jacket_vectors jacket_vectors
}; };

View file

@ -170,162 +170,52 @@ impl Score {
} }
// }}} // }}}
// {{{ Scores & Distribution => score // {{{ Scores & Distribution => score
pub fn resolve_ambiguities( pub fn resolve_distibution_ambiguities(
scores: Vec<Score>, score: Score,
read_distribution: Option<(u32, u32, u32)>, read_distribution: Option<(u32, u32, u32)>,
note_count: u32, note_count: u32,
) -> Result<(Score, Option<u32>, Option<&'static str>), Error> { ) -> Option<u32> {
if scores.len() == 0 { let read_distribution = read_distribution?;
return Err("No scores in list to disambiguate from.")?; let pures = read_distribution.0;
} let fars = read_distribution.1;
let losts = read_distribution.2;
let mut no_shiny_scores: Vec<_> = scores // {{{ Compute score from note breakdown subpairs
.iter() let pf_score = Score::compute_naive(note_count, pures, fars);
.map(|score| score.forget_shinies(note_count)) let fl_score = Score::compute_naive(
.collect(); note_count,
no_shiny_scores.sort(); note_count.checked_sub(losts + fars).unwrap_or(0),
no_shiny_scores.dedup(); fars,
);
if let Some(read_distribution) = read_distribution { let lp_score = Score::compute_naive(
let pures = read_distribution.0; note_count,
let fars = read_distribution.1; pures,
let losts = read_distribution.2; note_count.checked_sub(losts + pures).unwrap_or(0),
);
// Compute score from note breakdown subpairs // }}}
let pf_score = Score::compute_naive(note_count, pures, fars); // {{{ Look for consensus among recomputed scores
let fl_score = Score::compute_naive( // Lemma: if two computed scores agree, then so will the third
note_count, if pf_score == fl_score {
note_count.checked_sub(losts + fars).unwrap_or(0), Some(fars)
fars,
);
let lp_score = Score::compute_naive(
note_count,
pures,
note_count.checked_sub(losts + pures).unwrap_or(0),
);
if no_shiny_scores.len() == 1 {
// {{{ Score is fixed, gotta figure out the exact distribution
let score = *scores.iter().max().unwrap();
// {{{ Look for consensus among recomputed scores
// Lemma: if two computed scores agree, then so will the third
let consensus_fars = if pf_score == fl_score {
Some(fars)
} else {
// Due to the above lemma, we know all three scores must be distinct by
// this point.
//
// Our strategy is to check which of the three scores agrees with the real
// score, and to then trust the `far` value that contributed to that pair.
let no_shiny_score = score.forget_shinies(note_count);
let pf_appears = no_shiny_score == pf_score;
let fl_appears = no_shiny_score == fl_score;
let lp_appears = no_shiny_score == lp_score;
match (pf_appears, fl_appears, lp_appears) {
(true, false, false) => Some(fars),
(false, true, false) => Some(fars),
(false, false, true) => Some(note_count - pures - losts),
_ => None,
}
};
// }}}
if scores.len() == 1 {
Ok((score, consensus_fars, None))
} else {
Ok((score, consensus_fars, Some("Due to a reading error, I could not make sure the shiny-amount I calculated is accurate!")))
}
// }}}
} else {
// {{{ Score is not fixed, gotta figure out everything at once
// Some of the values in the note distribution are likely wrong (due to reading
// errors). To get around this, we take each pair from the triplet, compute the score
// it induces, and figure out if there's any consensus as to which value in the
// provided score list is the real one.
//
// Note that sometimes the note distribution cannot resolve any of the issues. This is
// usually the case when the disagreement comes from the number of shinies.
// {{{ Look for consensus among recomputed scores
// Lemma: if two computed scores agree, then so will the third
let (trusted_pure_count, consensus_computed_score, consensus_fars) = if pf_score
== fl_score
{
(true, pf_score, fars)
} else {
// Due to the above lemma, we know all three scores must be distinct by
// this point.
//
// Our strategy is to check which of the three scores appear in the
// provided score list.
let pf_appears = no_shiny_scores.contains(&pf_score);
let fl_appears = no_shiny_scores.contains(&fl_score);
let lp_appears = no_shiny_scores.contains(&lp_score);
match (pf_appears, fl_appears, lp_appears) {
(true, false, false) => (true, pf_score, fars),
(false, true, false) => (false, fl_score, fars),
(false, false, true) => (true, lp_score, note_count - pures - losts),
_ => Err(format!("Cannot disambiguate scores {:?}. Multiple disjoint note breakdown subpair scores appear on the possibility list", scores))?
}
};
// }}}
// {{{ Collect all scores that agree with the consensus score.
let agreement: Vec<_> = scores
.iter()
.filter(|score| score.forget_shinies(note_count) == consensus_computed_score)
.filter(|score| {
let shinies = score.shinies(note_count);
shinies <= note_count && (!trusted_pure_count || shinies <= pures)
})
.map(|v| *v)
.collect();
// }}}
// {{{ Case 1: Disagreement in the amount of shinies!
if agreement.len() > 1 {
let agreement_shiny_amounts: Vec<_> =
agreement.iter().map(|v| v.shinies(note_count)).collect();
println!(
"Shiny count disagreement. Possible scores: {:?}. Possible shiny amounts: {:?}, Read distribution: {:?}",
scores, agreement_shiny_amounts, read_distribution
);
let msg = Some(
"Due to a reading error, I could not make sure the shiny-amount I calculated is accurate!"
);
Ok((
agreement.into_iter().max().unwrap(),
Some(consensus_fars),
msg,
))
// }}}
// {{{ Case 2: Total agreement!
} else if agreement.len() == 1 {
Ok((agreement[0], Some(consensus_fars), None))
// }}}
// {{{ Case 3: No agreement!
} else {
Err(format!("Could not disambiguate between possible scores {:?}. Note distribution does not agree with any possibility, leading to a score of {:?}.", scores, consensus_computed_score))?
}
// }}}
// }}}
}
} else { } else {
if no_shiny_scores.len() == 1 { // Due to the above lemma, we know all three scores must be distinct by
if scores.len() == 1 { // this point.
Ok((scores[0], None, None)) //
} else { // Our strategy is to check which of the three scores agrees with the real
Ok((scores.into_iter().max().unwrap(), None, Some("Due to a reading error, I could not make sure the shiny-amount I calculated is accurate!"))) // score, and to then trust the `far` value that contributed to that pair.
} let no_shiny_score = score.forget_shinies(note_count);
} else { let pf_appears = no_shiny_score == pf_score;
Err("Cannot disambiguate between more than one score without a note distribution.")? let fl_appears = no_shiny_score == fl_score;
let lp_appears = no_shiny_score == lp_score;
match (pf_appears, fl_appears, lp_appears) {
(true, false, false) => Some(fars),
(false, true, false) => Some(fars),
(false, false, true) => Some(note_count - pures - losts),
_ => None,
} }
} }
// }}}
} }
// }}} // }}}
// {{{ Display self with diff // {{{ Display self with diff

View file

@ -19,7 +19,7 @@ pub fn get_assets_dir() -> PathBuf {
#[inline] #[inline]
fn get_font(name: &str) -> RefCell<Face> { fn get_font(name: &str) -> RefCell<Face> {
let face = FREETYPE_LIB.with(|lib| { let face = FREETYPE_LIB.with(|lib| {
lib.new_face(get_assets_dir().join(format!("{}-variable.ttf", name)), 0) lib.new_face(get_assets_dir().join(format!("{}.ttf", name)), 0)
.expect(&format!("Could not load {} font", name)) .expect(&format!("Could not load {} font", name))
}); });
RefCell::new(face) RefCell::new(face)
@ -27,8 +27,9 @@ fn get_font(name: &str) -> RefCell<Face> {
thread_local! { thread_local! {
pub static FREETYPE_LIB: Library = Library::init().unwrap(); pub static FREETYPE_LIB: Library = Library::init().unwrap();
pub static SAIRA_FONT: RefCell<Face> = get_font("saira"); pub static SAIRA_FONT: RefCell<Face> = get_font("saira-variable");
pub static EXO_FONT: RefCell<Face> = get_font("exo"); pub static EXO_FONT: RefCell<Face> = get_font("exo-variable");
pub static GEOSANS_FONT: RefCell<Face> = get_font("geosans-light");
} }
#[inline] #[inline]

View file

@ -140,7 +140,7 @@ fn float_to_ft_fixed(f: f32) -> i64 {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct TextStyle { pub struct TextStyle {
pub size: u32, pub size: u32,
pub weight: u32, pub weight: Option<u32>,
pub color: Color, pub color: Color,
pub align: (Align, Align), pub align: (Align, Align),
pub stroke: Option<(Color, f32)>, pub stroke: Option<(Color, f32)>,
@ -154,6 +154,11 @@ pub struct BitmapCanvas {
} }
impl BitmapCanvas { impl BitmapCanvas {
#[inline]
pub fn height(&self) -> u32 {
self.buffer.len() as u32 / 3 / self.width
}
// {{{ Draw pixel // {{{ Draw pixel
pub fn set_pixel(&mut self, pos: (u32, u32), color: Color) { pub fn set_pixel(&mut self, pos: (u32, u32), color: Color) {
let index = 3 * (pos.1 * self.width + pos.0) as usize; let index = 3 * (pos.1 * self.width + pos.0) as usize;
@ -169,7 +174,7 @@ impl BitmapCanvas {
// {{{ Draw RBG image // {{{ Draw RBG image
/// Draws a bitmap image /// Draws a bitmap image
pub fn blit_rbg(&mut self, pos: Position, (iw, ih): (u32, u32), src: &[u8]) { pub fn blit_rbg(&mut self, pos: Position, (iw, ih): (u32, u32), src: &[u8]) {
let height = self.buffer.len() as u32 / 3 / self.width; let height = self.height();
for dx in 0..iw { for dx in 0..iw {
for dy in 0..ih { for dy in 0..ih {
let x = pos.0 + dx as i32; let x = pos.0 + dx as i32;
@ -190,7 +195,7 @@ impl BitmapCanvas {
// {{{ Draw RGBA image // {{{ Draw RGBA image
/// Draws a bitmap image taking care of the alpha channel. /// Draws a bitmap image taking care of the alpha channel.
pub fn blit_rbga(&mut self, pos: Position, (iw, ih): (u32, u32), src: &[u8]) { pub fn blit_rbga(&mut self, pos: Position, (iw, ih): (u32, u32), src: &[u8]) {
let height = self.buffer.len() as u32 / 3 / self.width; let height = self.height();
for dx in 0..iw { for dx in 0..iw {
for dy in 0..ih { for dy in 0..ih {
let x = pos.0 + dx as i32; let x = pos.0 + dx as i32;
@ -212,7 +217,7 @@ impl BitmapCanvas {
// {{{ Fill // {{{ Fill
/// Fill with solid color /// Fill with solid color
pub fn fill(&mut self, pos: Position, (iw, ih): (u32, u32), color: Color) { pub fn fill(&mut self, pos: Position, (iw, ih): (u32, u32), color: Color) {
let height = self.buffer.len() as u32 / 3 / self.width; let height = self.height();
for dx in 0..iw { for dx in 0..iw {
for dy in 0..ih { for dy in 0..ih {
let x = pos.0 + dx as i32; let x = pos.0 + dx as i32;
@ -233,23 +238,25 @@ impl BitmapCanvas {
text: &str, text: &str,
) -> Result<(Position, Rect, Vec<(i64, Glyph)>), Error> { ) -> Result<(Position, Rect, Vec<(i64, Glyph)>), Error> {
// {{{ Control weight // {{{ Control weight
unsafe { if let Some(weight) = style.weight {
let raw = face.raw_mut() as *mut _; unsafe {
let slice = [(style.weight as i64) << 16]; let raw = face.raw_mut() as *mut _;
let slice = [(weight as i64) << 16];
// {{{ Debug logging // {{{ Debug logging
// let mut amaster = 0 as *mut FT_MM_Var; // let mut amaster = 0 as *mut FT_MM_Var;
// FT_Get_MM_Var(raw, &mut amaster as *mut _); // FT_Get_MM_Var(raw, &mut amaster as *mut _);
// println!("{:?}", *amaster); // println!("{:?}", *amaster);
// println!("{:?}", *(*amaster).axis); // println!("{:?}", *(*amaster).axis);
// println!("{:?}", *(*amaster).namedstyle); // println!("{:?}", *(*amaster).namedstyle);
// }}} // }}}
// Set variable weight // Set variable weight
let err = FT_Set_Var_Design_Coordinates(raw, 3, slice.as_ptr()); let err = FT_Set_Var_Design_Coordinates(raw, 3, slice.as_ptr());
if err != FT_Err_Ok { if err != FT_Err_Ok {
let err: FtResult<_> = Err(err.into()); let err: FtResult<_> = Err(err.into());
err?; err?;
}
} }
} }
// }}} // }}}
@ -418,7 +425,7 @@ impl BitmapCanvas {
#[inline] #[inline]
pub fn new(width: u32, height: u32) -> Self { pub fn new(width: u32, height: u32) -> Self {
let buffer = vec![u8::MAX; 8 * 3 * (width * height) as usize].into_boxed_slice(); let buffer = vec![u8::MAX; 3 * (width * height) as usize].into_boxed_slice();
Self { buffer, width } Self { buffer, width }
} }
} }

View file

@ -81,7 +81,7 @@ pub async fn magic(
}; };
edit_reply!(ctx, handle, "Image {}: reading score", i + 1).await?; edit_reply!(ctx, handle, "Image {}: reading score", i + 1).await?;
let score_possibilities = analyzer.read_score( let score = analyzer.read_score(
ctx.data(), ctx.data(),
Some(chart.note_count), Some(chart.note_count),
&ocr_image, &ocr_image,
@ -89,17 +89,11 @@ pub async fn magic(
)?; )?;
// {{{ Build play // {{{ Build play
let (score, maybe_fars, score_warning) = Score::resolve_ambiguities( let maybe_fars = Score::resolve_distibution_ambiguities(
score_possibilities, score,
note_distribution, note_distribution,
chart.note_count, chart.note_count,
) );
.map_err(|err| {
format!(
"Error occurred when disambiguating scores for '{}' [{:?}] by {}: {}",
song.title, difficulty, song.artist, err
)
})?;
let play = CreatePlay::new(score, &chart, &user) let play = CreatePlay::new(score, &chart, &user)
.with_attachment(file) .with_attachment(file)
@ -110,14 +104,10 @@ pub async fn magic(
// }}} // }}}
// }}} // }}}
// {{{ Deliver embed // {{{ Deliver embed
let (mut embed, attachment) = play let (embed, attachment) = play
.to_embed(&ctx.data().db, &user, &song, &chart, i, None) .to_embed(&ctx.data().db, &user, &song, &chart, i, None)
.await?; .await?;
if let Some(warning) = score_warning {
embed = embed.description(warning);
}
embeds.push(embed); embeds.push(embed);
attachments.extend(attachment); attachments.extend(attachment);
// }}} // }}}
@ -139,9 +129,11 @@ pub async fn magic(
handle.delete(ctx).await?; handle.delete(ctx).await?;
ctx.channel_id() if embeds.len() > 0 {
.send_files(ctx.http(), attachments, CreateMessage::new().embeds(embeds)) ctx.channel_id()
.await?; .send_files(ctx.http(), attachments, CreateMessage::new().embeds(embeds))
.await?;
}
} }
Ok(()) Ok(())

View file

@ -307,7 +307,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
font, font,
crate::bitmap::TextStyle { crate::bitmap::TextStyle {
size: 25, size: 25,
weight: 800, weight: Some(800),
color: Color::WHITE, color: Color::WHITE,
align: (Align::Center, Align::Center), align: (Align::Center, Align::Center),
stroke: None, stroke: None,
@ -327,7 +327,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
let initial_size = 24; let initial_size = 24;
let mut style = crate::bitmap::TextStyle { let mut style = crate::bitmap::TextStyle {
size: initial_size, size: initial_size,
weight: 800, weight: Some(800),
color: Color::WHITE, color: Color::WHITE,
align: (Align::Start, Align::Center), align: (Align::Start, Align::Center),
stroke: Some((Color::BLACK, 1.5)), stroke: Some((Color::BLACK, 1.5)),
@ -404,7 +404,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
font, font,
crate::bitmap::TextStyle { crate::bitmap::TextStyle {
size: 25, size: 25,
weight: 600, weight: Some(600),
color: Color::from_rgb_int(0xffffff), color: Color::from_rgb_int(0xffffff),
align: (Align::Center, Align::Center), align: (Align::Center, Align::Center),
stroke: None, stroke: None,
@ -442,7 +442,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
font, font,
crate::bitmap::TextStyle { crate::bitmap::TextStyle {
size: 23, size: 23,
weight: 800, weight: Some(800),
color: Color::WHITE, color: Color::WHITE,
align: (Align::Start, Align::Center), align: (Align::Start, Align::Center),
stroke: Some((Color::BLACK, 1.5)), stroke: Some((Color::BLACK, 1.5)),
@ -490,7 +490,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
font, font,
crate::bitmap::TextStyle { crate::bitmap::TextStyle {
size: if status == 'M' { 30 } else { 36 }, size: if status == 'M' { 30 } else { 36 },
weight: if status == 'M' { 800 } else { 500 }, weight: Some(if status == 'M' { 800 } else { 500 }),
color: Color::WHITE, color: Color::WHITE,
align: (Align::Center, Align::Center), align: (Align::Center, Align::Center),
stroke: None, stroke: None,
@ -526,7 +526,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
font, font,
crate::bitmap::TextStyle { crate::bitmap::TextStyle {
size: 30, size: 30,
weight: 650, weight: Some(650),
color: Color::from_rgb_int(0x203C6B), color: Color::from_rgb_int(0x203C6B),
align: (Align::Center, Align::Center), align: (Align::Center, Align::Center),
stroke: Some((Color::WHITE, 1.5)), stroke: Some((Color::WHITE, 1.5)),
@ -540,7 +540,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
EXO_FONT.with_borrow_mut(|font| -> Result<(), Error> { EXO_FONT.with_borrow_mut(|font| -> Result<(), Error> {
let mut style = crate::bitmap::TextStyle { let mut style = crate::bitmap::TextStyle {
size: 12, size: 12,
weight: 600, weight: Some(600),
color: Color::WHITE, color: Color::WHITE,
align: (Align::Center, Align::Center), align: (Align::Center, Align::Center),
stroke: None, stroke: None,
@ -556,7 +556,7 @@ pub async fn b30(ctx: Context<'_>) -> Result<(), Error> {
)?; )?;
style.size = 25; style.size = 25;
style.weight = 700; style.weight = Some(700);
drawer.text( drawer.text(
top_left_area, top_left_area,

View file

@ -3,7 +3,9 @@ use std::{fs, path::PathBuf};
use sqlx::SqlitePool; use sqlx::SqlitePool;
use crate::{ use crate::{
arcaea::chart::SongCache, arcaea::jacket::JacketCache, recognition::ui::UIMeasurements, arcaea::{chart::SongCache, jacket::JacketCache},
assets::{EXO_FONT, GEOSANS_FONT},
recognition::{hyperglass::CharMeasurements, ui::UIMeasurements},
}; };
// Types used by all command functions // Types used by all command functions
@ -19,6 +21,9 @@ pub struct UserContext {
pub song_cache: SongCache, pub song_cache: SongCache,
pub jacket_cache: JacketCache, pub jacket_cache: JacketCache,
pub ui_measurements: UIMeasurements, pub ui_measurements: UIMeasurements,
pub geosans_measurements: CharMeasurements,
pub exo_measurements: CharMeasurements,
} }
impl UserContext { impl UserContext {
@ -30,6 +35,10 @@ impl UserContext {
let mut song_cache = SongCache::new(&db).await?; let mut song_cache = SongCache::new(&db).await?;
let jacket_cache = JacketCache::new(&data_dir, &mut song_cache)?; let jacket_cache = JacketCache::new(&data_dir, &mut song_cache)?;
let ui_measurements = UIMeasurements::read(&data_dir)?; let ui_measurements = UIMeasurements::read(&data_dir)?;
let geosans_measurements = GEOSANS_FONT
.with_borrow_mut(|font| CharMeasurements::from_text(font, "0123456789'", None))?;
let exo_measurements = EXO_FONT
.with_borrow_mut(|font| CharMeasurements::from_text(font, "0123456789'", Some(700)))?;
println!("Created user context"); println!("Created user context");
@ -39,6 +48,8 @@ impl UserContext {
song_cache, song_cache,
jacket_cache, jacket_cache,
ui_measurements, ui_measurements,
geosans_measurements,
exo_measurements,
}) })
} }
} }

36
src/logs.rs Normal file
View file

@ -0,0 +1,36 @@
use std::{env, ops::Deref};
use image::{DynamicImage, EncodableLayout, ImageBuffer, PixelWithColorType};
use poise::serenity_prelude::Timestamp;
use crate::context::Error;
#[inline]
fn should_save_debug_images() -> bool {
env::var("SHIMMERING_DEBUG_IMGS")
.map(|s| s == "1")
.unwrap_or(false)
}
#[inline]
pub fn debug_image_log(image: &DynamicImage) -> Result<(), Error> {
if should_save_debug_images() {
image.save(format!("./logs/{}.png", Timestamp::now()))?;
}
Ok(())
}
#[inline]
pub fn debug_image_buffer_log<P, C>(image: &ImageBuffer<P, C>) -> Result<(), Error>
where
P: PixelWithColorType,
[P::Subpixel]: EncodableLayout,
C: Deref<Target = [P::Subpixel]>,
{
if should_save_debug_images() {
image.save(format!("./logs/{}.png", Timestamp::now()))?;
}
Ok(())
}

View file

@ -11,6 +11,7 @@ mod bitmap;
mod commands; mod commands;
mod context; mod context;
mod levenshtein; mod levenshtein;
mod logs;
mod recognition; mod recognition;
mod transform; mod transform;
mod user; mod user;

View file

@ -0,0 +1,289 @@
use freetype::Face;
use image::{DynamicImage, ImageBuffer, Luma};
use imageproc::{
contrast::{threshold, ThresholdType},
region_labelling::{connected_components, Connectivity},
};
use num::traits::Euclid;
use crate::{
bitmap::{Align, BitmapCanvas, Color, TextStyle},
context::Error,
logs::{debug_image_buffer_log, debug_image_log},
};
///! Hyperglass my own specialized OCR system
// {{{ ConponentVec
/// How many sub-segments to split each side into
const SPLIT_FACTOR: u32 = 5;
const IMAGE_VEC_DIM: usize = (SPLIT_FACTOR * SPLIT_FACTOR) as usize;
#[derive(Debug, Clone)]
struct ComponentVec {
chunks: [f32; IMAGE_VEC_DIM],
}
impl ComponentVec {
// {{{ (Component => vector) encoding
fn from_component(components: &ComponentsWithBounds, component: u32) -> Result<Self, Error> {
let mut chunks = [0.0; IMAGE_VEC_DIM];
let bounds = components
.bounds
.get(component as usize - 1)
.and_then(|o| o.as_ref())
.ok_or_else(|| "Missing bounds for given connected component")?;
for i in 0..(SPLIT_FACTOR * SPLIT_FACTOR) {
let (iy, ix) = i.div_rem_euclid(&SPLIT_FACTOR);
let x_start = bounds.x_min + ix * components.max_width / SPLIT_FACTOR;
let x_end = bounds.x_min + (ix + 1) * components.max_width / SPLIT_FACTOR;
let y_start = bounds.y_min + iy * components.max_height / SPLIT_FACTOR;
let y_end = bounds.y_min + (iy + 1) * components.max_height / SPLIT_FACTOR;
let mut count = 0;
for x in x_start..x_end {
for y in y_start..y_end {
if let Some(p) = components.components.get_pixel_checked(x, y)
&& p.0[0] == component
{
count += 1;
}
}
}
let size = (x_end + 1 - x_start) * (y_end + 1 - y_start);
if size == 0 {
return Err(format!(
"Got zero size for chunk [{x_start},{x_end}]x[{y_start},{y_end}]"
)
.into());
}
chunks[i as usize] = count as f32 / size as f32;
// print!("{} ", chunks[i as usize]);
// if i % SPLIT_FACTOR == SPLIT_FACTOR - 1 {
// print!("\n");
// }
}
let mut result = Self { chunks };
result.normalise();
Ok(result)
}
// }}}
// {{{ Distance
#[inline]
fn distance_squared_to(&self, other: &Self) -> f32 {
let mut total = 0.0;
for i in 0..IMAGE_VEC_DIM {
let d = self.chunks[i] - other.chunks[i];
total += d * d;
}
total
}
#[inline]
fn norm_squared(&self) -> f32 {
let mut total = 0.0;
for i in 0..IMAGE_VEC_DIM {
total += self.chunks[i] * self.chunks[i];
}
total
}
#[inline]
fn normalise(&mut self) {
let len = self.norm_squared().sqrt();
for i in 0..IMAGE_VEC_DIM {
self.chunks[i] /= len;
}
}
// }}}
}
// }}}
// {{{ Component bounds
#[derive(Clone, Copy)]
struct ComponentBounds {
x_min: u32,
y_min: u32,
x_max: u32,
y_max: u32,
}
struct ComponentsWithBounds {
components: ImageBuffer<Luma<u32>, Vec<u32>>,
// NOTE: the index is (the id of the component) - 1
// This is because the zero component represents the background,
// but we don't want to waste a place in this vector.
bounds: Vec<Option<ComponentBounds>>,
max_width: u32,
max_height: u32,
/// Stores the indices of `self.bounds` sorted based on their min position.
bounds_by_position: Vec<usize>,
}
impl ComponentsWithBounds {
fn from_image(image: &DynamicImage) -> Result<Self, Error> {
let image = threshold(&image.to_luma8(), 100, ThresholdType::Binary);
debug_image_buffer_log(&image)?;
let background = Luma([u8::MAX]);
let components = connected_components(&image, Connectivity::Eight, background);
let mut bounds: Vec<Option<ComponentBounds>> = Vec::new();
for x in 0..components.width() {
for y in 0..components.height() {
// {{{ Retrieve pixel if it's not backround
let component = components[(x, y)].0[0];
if component == 0 {
continue;
}
let index = component as usize - 1;
if index >= bounds.len() {
bounds.resize(index + 1, None);
}
// }}}
// {{{ Update bounds
if let Some(bounds) = (&mut bounds)[index].as_mut() {
bounds.x_min = bounds.x_min.min(x);
bounds.x_max = bounds.x_max.max(x);
bounds.y_min = bounds.y_min.min(y);
bounds.y_max = bounds.y_max.max(y);
} else {
bounds[index] = Some(ComponentBounds {
x_min: x,
x_max: x,
y_min: y,
y_max: y,
});
}
// }}}
}
}
// {{{ Remove components that are too large
for bound in &mut bounds {
if bound.map_or(false, |b| (b.x_max - b.x_min) >= 9 * image.width() / 10) {
*bound = None;
}
}
// }}}
// {{{ Compute max width/height
let max_width = bounds
.iter()
.filter_map(|o| o.as_ref())
.map(|b| b.x_max - b.x_min)
.max()
.ok_or_else(|| "No connected components found")?;
let max_height = bounds
.iter()
.filter_map(|o| o.as_ref())
.map(|b| b.y_max - b.y_min)
.max()
.ok_or_else(|| "No connected components found")?;
// }}}
let mut bounds_by_position: Vec<usize> = (0..(bounds.len()))
.filter(|i| bounds[*i].is_some())
.collect();
bounds_by_position.sort_by_key(|i| bounds[*i].unwrap().x_min);
Ok(Self {
components,
bounds,
max_width,
max_height,
bounds_by_position,
})
}
}
// }}}
// {{{ Char measurements
pub struct CharMeasurements {
chars: Vec<(char, ComponentVec)>,
}
impl CharMeasurements {
// {{{ Creation
pub fn from_text(face: &mut Face, string: &str, weight: Option<u32>) -> Result<Self, Error> {
// These are bad estimates lol
let char_w = 35;
let char_h = 60;
let mut canvas = BitmapCanvas::new(10 + char_w * string.len() as u32, char_h + 10);
canvas.text(
(5, 5),
face,
TextStyle {
stroke: None,
drop_shadow: None,
align: (Align::Start, Align::Start),
size: char_h,
color: Color::BLACK,
weight: None,
},
&string,
)?;
let buffer = ImageBuffer::from_raw(canvas.width, canvas.height(), canvas.buffer.to_vec())
.ok_or_else(|| "Failed to turn buffer into canvas")?;
let image = DynamicImage::ImageRgb8(buffer);
debug_image_log(&image)?;
let components = ComponentsWithBounds::from_image(&image)?;
let mut chars = Vec::with_capacity(string.len());
for (i, char) in string.chars().enumerate() {
chars.push((
char,
ComponentVec::from_component(
&components,
components.bounds_by_position[i] as u32 + 1,
)?,
))
}
Ok(Self { chars })
}
// }}}
// {{{ Recognition
pub fn recognise(&self, image: &DynamicImage) -> Result<String, Error> {
let components = ComponentsWithBounds::from_image(image)?;
let mut result = String::new();
for i in &components.bounds_by_position {
let vec = ComponentVec::from_component(&components, *i as u32 + 1)?;
let best_match = self
.chars
.iter()
.map(|(i, v)| (*i, v, v.distance_squared_to(&vec)))
.min_by(|(_, _, d1), (_, _, d2)| {
d1.partial_cmp(d2).expect("NaN distance encountered")
})
.map(|(i, _, d)| (d.sqrt(), i))
.ok_or_else(|| "No chars in cache")?;
// println!("char '{}', distance {}", best_match.1, best_match.0);
if best_match.0 <= (IMAGE_VEC_DIM * 10) as f32 {
result.push(best_match.1);
}
}
Ok(result)
}
// }}}
}
// }}}

View file

@ -1,3 +1,4 @@
pub mod fuzzy_song_name; pub mod fuzzy_song_name;
pub mod hyperglass;
pub mod recognize; pub mod recognize;
pub mod ui; pub mod ui;

View file

@ -1,14 +1,12 @@
use std::fmt::Display; use std::fmt::Display;
use std::io::Cursor;
use std::str::FromStr; use std::str::FromStr;
use std::{env, fs};
use hypertesseract::{PageSegMode, Tesseract}; use hypertesseract::{PageSegMode, Tesseract};
use image::imageops::{resize, FilterType}; use image::imageops::FilterType;
use image::{DynamicImage, GenericImageView, RgbaImage}; use image::{DynamicImage, GenericImageView};
use image::{ImageBuffer, Rgba}; use image::{ImageBuffer, Rgba};
use num::integer::Roots; use num::integer::Roots;
use poise::serenity_prelude::{CreateAttachment, CreateEmbed, CreateMessage, Timestamp}; use poise::serenity_prelude::{CreateAttachment, CreateEmbed, CreateMessage};
use crate::arcaea::chart::{Chart, Difficulty, Song, DIFFICULTY_MENU_PIXEL_COLORS}; use crate::arcaea::chart::{Chart, Difficulty, Song, DIFFICULTY_MENU_PIXEL_COLORS};
use crate::arcaea::jacket::IMAGE_VEC_DIM; use crate::arcaea::jacket::IMAGE_VEC_DIM;
@ -16,6 +14,7 @@ use crate::arcaea::score::Score;
use crate::bitmap::{Color, Rect}; use crate::bitmap::{Color, Rect};
use crate::context::{Context, Error, UserContext}; use crate::context::{Context, Error, UserContext};
use crate::levenshtein::edit_distance; use crate::levenshtein::edit_distance;
use crate::logs::debug_image_buffer_log;
use crate::recognition::fuzzy_song_name::guess_chart_name; use crate::recognition::fuzzy_song_name::guess_chart_name;
use crate::recognition::ui::{ use crate::recognition::ui::{
ScoreScreenRect, SongSelectRect, UIMeasurementRect, UIMeasurementRect::*, ScoreScreenRect, SongSelectRect, UIMeasurementRect, UIMeasurementRect::*,
@ -47,25 +46,6 @@ impl ImageAnalyzer {
} }
// {{{ Crop // {{{ Crop
#[inline]
fn should_save_debug_images() -> bool {
env::var("SHIMMERING_DEBUG_IMGS")
.map(|s| s == "1")
.unwrap_or(false)
}
fn save_image(&mut self, image: &RgbaImage) -> Result<(), Error> {
self.clear();
let mut cursor = Cursor::new(&mut self.bytes);
image.write_to(&mut cursor, image::ImageFormat::Png)?;
if Self::should_save_debug_images() {
fs::write(format!("./logs/{}.png", Timestamp::now()), &self.bytes)?;
}
Ok(())
}
#[inline] #[inline]
pub fn crop(&mut self, image: &DynamicImage, rect: Rect) -> ImageBuffer<Rgba<u8>, Vec<u8>> { pub fn crop(&mut self, image: &DynamicImage, rect: Rect) -> ImageBuffer<Rgba<u8>, Vec<u8>> {
image image
@ -84,9 +64,7 @@ impl ImageAnalyzer {
self.last_rect = Some((ui_rect, rect)); self.last_rect = Some((ui_rect, rect));
let result = self.crop(image, rect); let result = self.crop(image, rect);
if Self::should_save_debug_images() { debug_image_buffer_log(&result)?;
self.save_image(&result).unwrap();
}
Ok(result) Ok(result)
} }
@ -97,18 +75,17 @@ impl ImageAnalyzer {
ctx: &UserContext, ctx: &UserContext,
image: &DynamicImage, image: &DynamicImage,
ui_rect: UIMeasurementRect, ui_rect: UIMeasurementRect,
size: impl FnOnce(Rect) -> (u32, u32), size: (u32, u32),
) -> Result<ImageBuffer<Rgba<u8>, Vec<u8>>, Error> { ) -> Result<ImageBuffer<Rgba<u8>, Vec<u8>>, Error> {
let rect = ctx.ui_measurements.interpolate(ui_rect, image)?; let rect = ctx.ui_measurements.interpolate(ui_rect, image)?;
let size = size(rect);
self.last_rect = Some((ui_rect, rect)); self.last_rect = Some((ui_rect, rect));
let result = self.crop(image, rect); let result = self.crop(image, rect);
let result = resize(&result, size.0, size.1, FilterType::Nearest); let result = DynamicImage::ImageRgba8(result)
.resize(size.0, size.1, FilterType::Nearest)
.into_rgba8();
if Self::should_save_debug_images() { debug_image_buffer_log(&result)?;
self.save_image(&result).unwrap();
}
Ok(result) Ok(result)
} }
@ -130,8 +107,7 @@ impl ImageAnalyzer {
)); ));
if let Some((ui_rect, rect)) = self.last_rect { if let Some((ui_rect, rect)) = self.last_rect {
let cropped = self.crop(image, rect); self.crop(image, rect);
self.save_image(&cropped)?;
let bytes = std::mem::take(&mut self.bytes); let bytes = std::mem::take(&mut self.bytes);
let error_attachement = CreateAttachment::bytes(bytes, filename); let error_attachement = CreateAttachment::bytes(bytes, filename);
@ -161,14 +137,7 @@ impl ImageAnalyzer {
note_count: Option<u32>, note_count: Option<u32>,
image: &DynamicImage, image: &DynamicImage,
kind: ScoreKind, kind: ScoreKind,
) -> Result<Vec<Score>, Error> { ) -> Result<Score, Error> {
// yes, this was painfully hand-picked
let desired_height = 100;
let x_scaling_factor = match kind {
ScoreKind::SongSelect => 1.0,
ScoreKind::ScoreScreen => 0.666,
};
let image = self.interp_crop_resize( let image = self.interp_crop_resize(
ctx, ctx,
image, image,
@ -176,123 +145,37 @@ impl ImageAnalyzer {
ScoreKind::SongSelect => SongSelect(SongSelectRect::Score), ScoreKind::SongSelect => SongSelect(SongSelectRect::Score),
ScoreKind::ScoreScreen => ScoreScreen(ScoreScreenRect::Score), ScoreKind::ScoreScreen => ScoreScreen(ScoreScreenRect::Score),
}, },
|rect| { (u32::MAX, 100),
(
(rect.width as f32 * desired_height as f32 / rect.height as f32
* x_scaling_factor) as u32,
desired_height,
)
},
)?; )?;
let mut results = vec![]; let measurements = match kind {
for mode in [ ScoreKind::SongSelect => &ctx.exo_measurements,
PageSegMode::SingleWord, ScoreKind::ScoreScreen => &ctx.geosans_measurements,
PageSegMode::RawLine, };
PageSegMode::SingleLine,
PageSegMode::SparseText,
PageSegMode::SingleBlock,
] {
let result: Result<_, Error> = try {
// {{{ Read score using tesseract
let text = Tesseract::builder()
.language(hypertesseract::Language::English)
.whitelist_str("0123456789'/")?
.page_seg_mode(mode)
.assume_numeric_input()
.build()?
.load_image(&image)?
.recognize()?
.get_text()?;
let text: String = text let result = Score(
.trim() measurements
.chars() .recognise(&DynamicImage::ImageRgba8(image))?
.map(|char| if char == '/' { '7' } else { char }) .chars()
.filter(|char| *char != ' ' && *char != '\'') .filter(|c| *c != '\'')
.collect(); .collect::<String>()
.parse()?,
);
let score = u32::from_str_radix(&text, 10)?;
Score(score)
// }}}
};
match result {
Ok(result) => {
results.push(result.0);
}
Err(err) => {
println!("OCR score result error: {}", err);
}
}
}
// {{{ Score correction
// The OCR sometimes fails to read "74" with the arcaea font,
// so we try to detect that and fix it
loop {
let old_stack_len = results.len();
println!("Results {:?}", results);
results = results
.iter()
.flat_map(|result| {
// If the length is correct, we are good to go!
if *result >= 8_000_000 {
vec![*result]
} else {
let mut results = vec![];
for i in [0, 1, 3, 4] {
let d = 10u32.pow(i);
if (*result / d) % 10 == 4 && (*result / d) % 100 != 74 {
let n = d * 10;
results.push((*result / n) * n * 10 + 7 * n + (*result % n));
}
}
results
}
})
.collect();
if old_stack_len == results.len() {
break;
}
}
// }}}
// {{{ Return score if consensus exists // {{{ Return score if consensus exists
// 1. Discard scores that are known to be impossible // 1. Discard scores that are known to be impossible
let mut results: Vec<_> = results if result.0 <= 10_010_000
.into_iter() && note_count.map_or(true, |note_count| {
.filter(|result| { let (zeta, shinies, score_units) = result.analyse(note_count);
8_000_000 <= *result 8_000_000 <= zeta.0
&& *result <= 10_010_000 && zeta.0 <= 10_000_000
&& note_count && shinies <= note_count
.map(|note_count| { && score_units <= 2 * note_count
let (zeta, shinies, score_units) = Score(*result).analyse(note_count); }) {
8_000_000 <= zeta.0 Ok(result)
&& zeta.0 <= 10_000_000 && shinies <= note_count } else {
&& score_units <= 2 * note_count Err(format!("Score {result} is not vaild").into())
})
.unwrap_or(true)
})
.map(|r| Score(r))
.collect();
println!("Results {:?}", results);
// 2. Look for consensus
for result in results.iter() {
if results.iter().filter(|e| **e == *result).count() > results.len() / 2 {
return Ok(vec![*result]);
}
} }
// }}}
// If there's no consensus, we return everything
results.sort();
results.dedup();
println!("Results {:?}", results);
Ok(results)
} }
// }}} // }}}
// {{{ Read difficulty // {{{ Read difficulty
@ -335,24 +218,25 @@ impl ImageAnalyzer {
return Ok(min.1); return Ok(min.1);
} }
let mut ocr = Tesseract::builder() let (text, conf) = Tesseract::builder()
.language(hypertesseract::Language::English) .language(hypertesseract::Language::English)
.page_seg_mode(PageSegMode::RawLine) .page_seg_mode(PageSegMode::RawLine)
.build()?; .build()?
.recognize_text_cloned_with_conf(&self.interp_crop(
ctx,
image,
ScoreScreen(ScoreScreenRect::Difficulty),
)?)?;
ocr.load_image(&self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::Difficulty))?)?
.recognize()?;
let text: &str = &ocr.get_text()?;
let text = text.trim().to_lowercase(); let text = text.trim().to_lowercase();
// let conf = t.mean_text_conf(); if conf < 10 && conf != 0 {
// if conf < 10 && conf != 0 { return Err(format!(
// Err(format!( "Difficulty text is not readable (confidence = {}, text = {}).",
// "Difficulty text is not readable (confidence = {}, text = {}).", conf, text
// conf, text )
// ))?; .into());
// } }
let difficulty = Difficulty::DIFFICULTIES let difficulty = Difficulty::DIFFICULTIES
.iter() .iter()
@ -370,23 +254,21 @@ impl ImageAnalyzer {
ctx: &UserContext, ctx: &UserContext,
image: &DynamicImage, image: &DynamicImage,
) -> Result<ScoreKind, Error> { ) -> Result<ScoreKind, Error> {
let text = Tesseract::builder() let (text, conf) = Tesseract::builder()
.language(hypertesseract::Language::English) .language(hypertesseract::Language::English)
.page_seg_mode(PageSegMode::RawLine) .page_seg_mode(PageSegMode::RawLine)
.build()? .build()?
.load_image(&self.interp_crop(ctx, image, PlayKind)?)? .recognize_text_cloned_with_conf(&self.interp_crop(ctx, image, PlayKind)?)?;
.recognize()?
.get_text()?
.trim()
.to_string();
// let conf = t.mean_text_conf(); let text = text.trim().to_string();
// if conf < 10 && conf != 0 {
// Err(format!( if conf < 10 && conf != 0 {
// "Score kind text is not readable (confidence = {}, text = {}).", return Err(format!(
// conf, text "Score kind text is not readable (confidence = {}, text = {}).",
// ))?; conf, text
// } )
.into());
}
let result = if edit_distance(&text, "Result") < edit_distance(&text, "Select a song") { let result = if edit_distance(&text, "Result") < edit_distance(&text, "Select a song") {
ScoreKind::ScoreScreen ScoreKind::ScoreScreen
@ -404,23 +286,25 @@ impl ImageAnalyzer {
image: &DynamicImage, image: &DynamicImage,
difficulty: Difficulty, difficulty: Difficulty,
) -> Result<(&'a Song, &'a Chart), Error> { ) -> Result<(&'a Song, &'a Chart), Error> {
let text = Tesseract::builder() let (text, conf) = Tesseract::builder()
.language(hypertesseract::Language::English) .language(hypertesseract::Language::English)
.page_seg_mode(PageSegMode::SingleLine) .page_seg_mode(PageSegMode::SingleLine)
.whitelist_str("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.()- ")? .whitelist_str("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.()- ")?
.build()? .build()?
.load_image(&self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::Title))?)? .recognize_text_cloned_with_conf(&self.interp_crop(
.recognize()? ctx,
.get_text()?; image,
ScoreScreen(ScoreScreenRect::Title),
)?)?;
// let conf = t.mean_text_conf(); if conf < 20 && conf != 0 {
// if conf < 20 && conf != 0 { return Err(format!(
// Err(format!( "Title text is not readable (confidence = {}, text = {}).",
// "Title text is not readable (confidence = {}, text = {}).", conf,
// conf, text.trim()
// raw_text.trim() )
// ))?; .into());
// } }
guess_chart_name(&text, &ctx.song_cache, Some(difficulty), false) guess_chart_name(&text, &ctx.song_cache, Some(difficulty), false)
} }
@ -478,23 +362,19 @@ impl ImageAnalyzer {
ctx: &UserContext, ctx: &UserContext,
image: &DynamicImage, image: &DynamicImage,
) -> Result<(u32, u32, u32), Error> { ) -> Result<(u32, u32, u32), Error> {
let mut ocr = Tesseract::builder()
.language(hypertesseract::Language::English)
.page_seg_mode(PageSegMode::SparseText)
.whitelist_str("0123456789")?
.assume_numeric_input()
.build()?;
let mut out = [0; 3]; let mut out = [0; 3];
use ScoreScreenRect::*; use ScoreScreenRect::*;
static KINDS: [ScoreScreenRect; 3] = [Pure, Far, Lost]; static KINDS: [ScoreScreenRect; 3] = [Pure, Far, Lost];
for i in 0..3 { for i in 0..3 {
let text = ocr let text = Tesseract::builder()
.load_image(&self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?)? .language(hypertesseract::Language::English)
.recognize()? .page_seg_mode(PageSegMode::SparseText)
.get_text()?; .whitelist_str("0123456789")?
.assume_numeric_input()
.build()?
.recognize_text_cloned(&self.interp_crop(ctx, image, ScoreScreen(KINDS[i]))?)?;
println!("Raw '{}'", text.trim()); println!("Raw '{}'", text.trim());
out[i] = u32::from_str(&text.trim()).unwrap_or(0); out[i] = u32::from_str(&text.trim()).unwrap_or(0);
@ -510,26 +390,28 @@ impl ImageAnalyzer {
ctx: &'a UserContext, ctx: &'a UserContext,
image: &DynamicImage, image: &DynamicImage,
) -> Result<u32, Error> { ) -> Result<u32, Error> {
let text = Tesseract::builder() let (text, conf) = Tesseract::builder()
.language(hypertesseract::Language::English) .language(hypertesseract::Language::English)
.page_seg_mode(PageSegMode::SingleLine) .page_seg_mode(PageSegMode::SingleLine)
.whitelist_str("0123456789")? .whitelist_str("0123456789")?
.assume_numeric_input() .assume_numeric_input()
.build()? .build()?
.load_image(&self.interp_crop(ctx, image, ScoreScreen(ScoreScreenRect::MaxRecall))?)? .recognize_text_cloned_with_conf(&self.interp_crop(
.recognize()? ctx,
.get_text()?; image,
ScoreScreen(ScoreScreenRect::MaxRecall),
)?)?;
let max_recall = u32::from_str_radix(text.trim(), 10)?; let max_recall = u32::from_str_radix(text.trim(), 10)?;
// let conf = t.mean_text_conf(); if conf < 20 && conf != 0 {
// if conf < 20 && conf != 0 { return Err(format!(
// Err(format!( "Title text is not readable (confidence = {}, text = {}).",
// "Title text is not readable (confidence = {}, text = {}).", conf,
// conf, text.trim()
// raw_text.trim() )
// ))?; .into());
// } }
Ok(max_recall) Ok(max_recall)
} }