diff --git a/Cargo.lock b/Cargo.lock index d431661..35a0e54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,7 +64,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys", ] [[package]] @@ -75,7 +75,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys", ] [[package]] @@ -206,7 +206,7 @@ checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" [[package]] name = "collapsescanner" -version = "1.0.2" +version = "1.0.3" dependencies = [ "byteorder", "clap", @@ -234,20 +234,19 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys", ] [[package]] name = "console" -version = "0.15.11" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" dependencies = [ "encode_unicode", "libc", - "once_cell", "unicode-width", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -546,14 +545,14 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.11" +version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ "console", - "number_prefix", "portable-atomic", "unicode-width", + "unit-prefix", "web-time", ] @@ -655,12 +654,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "once_cell" version = "1.21.4" @@ -855,9 +848,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -868,11 +861,11 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.9" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" dependencies = [ - "serde", + "serde_core", ] [[package]] @@ -954,44 +947,42 @@ checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "toml" -version = "0.8.23" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +checksum = "81f3d15e84cbcd896376e6730314d59fb5a87f31e4b038454184435cd57defee" dependencies = [ - "serde", + "indexmap", + "serde_core", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_parser", + "toml_writer", + "winnow", ] [[package]] name = "toml_datetime" -version = "0.6.11" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" dependencies = [ - "serde", + "serde_core", ] [[package]] -name = "toml_edit" -version = "0.22.27" +name = "toml_parser" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "indexmap", - "serde", - "serde_spanned", - "toml_datetime", - "toml_write", "winnow", ] [[package]] -name = "toml_write" -version = "0.1.2" +name = "toml_writer" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" [[package]] name = "typed-path" @@ -1023,6 +1014,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "utf8parse" version = "0.2.2" @@ -1158,7 +1155,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys", ] [[package]] @@ -1167,15 +1164,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -1185,78 +1173,11 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - [[package]] name = "winnow" -version = "0.7.15" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" -dependencies = [ - "memchr", -] +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" [[package]] name = "wit-bindgen" diff --git a/Cargo.toml b/Cargo.toml index 4d74b40..0eb8cd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "collapsescanner" -version = "1.0.2" +version = "1.0.3" edition = "2021" authors = ["dest4590"] default-run = "collapsescanner" @@ -15,13 +15,13 @@ zip = "8.6.0" byteorder = "1.5.0" walkdir = "2.5.0" serde = { version = "1.0.228", features = ["derive"] } -serde_json = "1.0.149" +serde_json = "1.0.150" rayon = "1.12.0" colored = { version = "3.1.1" } clap = { version = "4.6.1", features = ["derive"] } -once_cell = "1.20" -indicatif = "0.17.11" -toml = "0.8.23" +once_cell = "1.21.4" +indicatif = "0.18.4" +toml = "1.1.2" [profile.release] codegen-units = 1 diff --git a/README.md b/README.md index 4d05498..07521a6 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,25 @@ # CollapseScanner -CollapseScanner is a local static scanner for Java jars. Point it at a `.jar`, a `.class` file, or a directory, and it will look for the parts you probably want to inspect first. +CollapseScanner is a fast, static scanner for Java bytecode. Point it at a `.jar`, a `.class` file, or a directory, and it will analyze class structure, bytecode patterns, and archive contents to give you a risk-focused report. -It does not run the sample. It does not decompile everything into source. It reads class structure, bytecode references, strings, and archive contents, then gives you a short risk-focused report. +It does not run the sample. It does not decompile everything into source. It reads class bytecode, extracts method calls and string constants, then gives you a short report highlighting what matters most. ## What it looks for -CollapseScanner checks for: +CollapseScanner detects: -- hardcoded IPv4 and IPv6 addresses -- URLs, suspicious domains, and Discord webhooks -- token-like secrets and hardcoded credentials -- process execution, reflection, dynamic loading, attach APIs, instrumentation, JNA, and `Unsafe` -- high-entropy Base64 or hex blobs -- Unicode name obfuscation -- malformed or tampered class files -- embedded scripts, binaries, native libraries, and suspicious archive entries -- nested archives inside JARs +- **Risky APIs**: Process execution (`Runtime.exec`, `ProcessBuilder`), reflection, dynamic class loading, JNI, `Unsafe`, Java agents, attach APIs +- **Network infrastructure**: IPv4 and IPv6 addresses, URLs, suspicious domains, Discord webhooks, C2 indicators +- **Secrets**: Token-like strings, hardcoded credentials, API keys, database URLs +- **Obfuscation**: Unicode-based name tricks, tampered class files, suspicious compressions +- **Native payloads**: Embedded binaries, native libraries (`.dll`, `.so`), script engines +- **Archive anomalies**: Nested archives, suspicious compression ratios, malformed entries -The goal is triage. If a file is noisy, packed, or reaching out to strange infrastructure, CollapseScanner should make that obvious quickly. +The goal is triage. If a file is reaching out to strange infrastructure, using dangerous APIs, or looks obfuscated, CollapseScanner should flag it quickly. ## Install -You need Rust. +You need Rust 1.70+. ```bash git clone https://github.com/dest4590/CollapseScanner.git @@ -30,11 +27,7 @@ cd CollapseScanner cargo build --release ``` -The binary will be at: - -```bash -target/release/collapsescanner -``` +The binary will be at `target/release/collapsescanner`. ## Usage @@ -45,23 +38,26 @@ collapsescanner # Scan only network indicators collapsescanner --mode network -# Scan only malicious APIs, keywords, and secrets +# Scan only malicious APIs and secrets collapsescanner --mode malicious # Scan only obfuscation signals collapsescanner --mode obfuscation -# Write JSON for another tool +# Write JSON output to a file collapsescanner --json --output report.json # Scan only matching entries collapsescanner mods/ --find "*.class" --exclude "META-INF/*" -# Load repeatable settings from a TOML config file +# Use a fixed worker thread count +collapsescanner sample.jar --threads 8 + +# Load repeatable settings from a TOML config collapsescanner sample.jar --config scanner.toml -# Use a fixed worker count -collapsescanner sample.jar --threads 8 +# Suppress false positives with a keyword list +collapsescanner --ignore_keywords keywords.txt ``` Example `scanner.toml`: @@ -71,19 +67,18 @@ mode = "all" threads = 0 exclude = ["META-INF/*", "**/test/**"] find = ["*.jar", "*.class"] -ignore_keywords = "ignore-keywords.txt" +ignore_keywords = "keywords.txt" ``` ## Output -The normal terminal report starts with a summary, then shows: +The default terminal report includes: -- risk score -- total findings and affected files -- finding breakdown -- severity distribution -- top files to inspect first -- detailed per-file findings +- **Risk score** (0-10) +- **Summary**: total findings and affected files +- **Severity distribution**: count of Critical, High, Medium, Low items +- **Files to inspect**: top targets by danger score +- **Detailed findings**: per-file results grouped by finding type Example: @@ -92,27 +87,47 @@ Risk: MODERATE RISK (6/10) Findings: 18 across 7 file(s) Scanned: 240 file(s) in 1.42s (169.0 files/sec) -Finding breakdown - [SECRET] Credential or Token [CRITICAL] (1) - [1] Potential embedded credential: token=ab...A91f (44 chars) +SEVERITY DISTRIBUTION + 2 CRITICAL │ 4 HIGH │ 8 MEDIUM │ 4 LOW -Start here - [1] com/example/Loader.class (8/10, 4 findings) +ALL FINDINGS + com/example/Loader.class · HIGH · 4 findings + 🔴 SuspiciousApi: Process execution API usage: Runtime.exec() + 🟠 IpAddress: 192.168.1.100 + 🟡 CredentialSecret: Potential embedded token ``` -Use `--json` when you want stable machine-readable output. Use `--output` with or without `--json` to save the same JSON report to disk. +Use `--json` to get stable machine-readable output. Use `--output` with or without `--json` to save results to disk. -When you run the normal terminal report interactively, CollapseScanner now shows a simple live progress bar automatically. It stays out of the way for JSON output and other non-interactive output. +The tool shows a live progress bar during interactive terminal scans, disabled for JSON output. -## Modes +## Detection Modes -`all` runs every detector and is the default. +**all** (default) +Runs every detector. Use when you have time and want complete coverage. -`network` focuses on URLs, IPs, suspicious infrastructure, and webhooks. +**network** +Focuses on infrastructure: URLs, IPs, domains, webhooks, C2 indicators. -`malicious` focuses on risky APIs, suspicious keywords, encoded payloads, and token-like secrets. +**malicious** +Focuses on dangerous code: risky APIs, native calls, reflection, secrets, keywords. -`obfuscation` focuses on class/name weirdness and tampered class indicators. +**obfuscation** +Focuses on anti-analysis tricks: Unicode tricks, class file tampering, high-entropy blobs. + +## Architecture + +The codebase is organized into focused modules: + +- **rules.rs**: Consolidated detection patterns, domains, API markers, regex definitions +- **parsers/**: Java class bytecode parser (constant pool, methods, strings) +- **scanner/**: Main scanning orchestration (file discovery, JAR extraction, class analysis) +- **cache/**: Safe string caching to optimize repeated scanning +- **config/**: System resource detection (memory-based cache tuning) +- **output/**: Terminal and JSON formatting +- **types.rs**: Shared data structures and enums +- **errors.rs**: Error handling +- **utils.rs**: Utility functions ## Notes @@ -120,4 +135,13 @@ CollapseScanner is static analysis. It will not see behavior that only appears a It is usually a good first pass before opening a decompiler or running a sample in a sandbox. -CLI flags override values loaded from `--config`, so the config file works well as a baseline and one-off command-line options can still narrow or expand a scan. +CLI flags override values from `--config`, so the config file works as a baseline and one-off options can narrow or expand a scan. + +## Performance + +CollapseScanner uses multi-threaded scanning via Rayon. By default it uses all available cores. Typical performance: + +- Small JAR (< 1 MB): 50-200 ms +- Medium JAR (1-10 MB): 200 ms - 1 s +- Large JAR (10-100 MB): 1-5 s +- Entire directory: scales linearly with core count diff --git a/src/detection.rs b/src/cache/mod.rs similarity index 92% rename from src/detection.rs rename to src/cache/mod.rs index d1df8e2..2cda569 100644 --- a/src/detection.rs +++ b/src/cache/mod.rs @@ -1,9 +1,9 @@ use once_cell::sync::Lazy; -use std::collections::hash_map::DefaultHasher; -use std::collections::HashSet; +use std::collections::{hash_map::DefaultHasher, HashSet}; use std::hash::{Hash, Hasher}; use std::sync::{Arc, RwLock}; + pub static SAFE_STRING_CACHE: Lazy>>> = Lazy::new(|| Arc::new(RwLock::new(HashSet::new()))); @@ -21,6 +21,7 @@ pub fn cache_safe_string(s: &str) { } } + pub fn calculate_detection_hash(data: &[u8]) -> u64 { let mut hasher = DefaultHasher::new(); diff --git a/src/config/mod.rs b/src/config/mod.rs new file mode 100644 index 0000000..9f82fa8 --- /dev/null +++ b/src/config/mod.rs @@ -0,0 +1,7 @@ +pub mod system; + +pub use system::SystemConfig; + +use once_cell::sync::Lazy; + +pub static SYSTEM_CONFIG: Lazy = Lazy::new(SystemConfig::new); diff --git a/src/config.rs b/src/config/system.rs similarity index 96% rename from src/config.rs rename to src/config/system.rs index 4ca41d6..532946e 100644 --- a/src/config.rs +++ b/src/config/system.rs @@ -1,4 +1,3 @@ -use once_cell::sync::Lazy; use std::env; const DEFAULT_RESULT_CACHE_SIZE: usize = 4096; @@ -9,6 +8,7 @@ const LOW_MEMORY_THRESHOLD: u64 = 4 * 1024 * 1024 * 1024; const MEDIUM_MEMORY_THRESHOLD: u64 = 8 * 1024 * 1024 * 1024; const HIGH_MEMORY_THRESHOLD: u64 = 16 * 1024 * 1024 * 1024; +#[derive(Debug, Clone)] pub struct SystemConfig { pub result_cache_size: usize, pub buffer_size: usize, @@ -96,4 +96,8 @@ impl SystemConfig { } } -pub static SYSTEM_CONFIG: Lazy = Lazy::new(SystemConfig::new); +impl Default for SystemConfig { + fn default() -> Self { + Self::new() + } +} diff --git a/src/constants.rs b/src/constants.rs deleted file mode 100644 index 6564155..0000000 --- a/src/constants.rs +++ /dev/null @@ -1,82 +0,0 @@ -use once_cell::sync::Lazy; -use std::collections::HashSet; - -pub const JAR_EXTS: &[&str] = &["jar"]; -pub const CLASS_EXTS: &[&str] = &["class"]; -pub const JAR_CLASS_EXTS: &[&str] = &["jar", "class"]; - -pub static SUSSY_DOMAINS: Lazy> = Lazy::new(|| { - [ - "discord.com", - "discordapp.com", - "discord.gg", - "cdn.discordapp.com", - "pastebin.com", - "hastebin.com", - "ghostbin.co", - "gofile.io", - "transfer.sh", - "webhook.site", - "requestbin.net", - "ngrok.io", - "ngrok-free.app", - "localtunnel.me", - "serveo.net", - "grabify.link", - "iplogger.org", - "ipify.org", - "ifconfig.me", - "bit.ly", - "tinyurl.com", - ] - .iter() - .map(|&s| s.to_lowercase()) - .collect() -}); - -pub const DYNAMIC_LOADING_MARKERS: &[&str] = - &["defineClass", "URLClassLoader", "Lookup.defineClass"]; - -pub const SCRIPT_ENGINE_MARKERS: &[&str] = &[ - "javax/script/ScriptEngineManager", - "javax/script/ScriptEngine", -]; - -pub const JAVA_AGENT_MARKERS: &[&str] = &[ - "java/lang/instrument/Instrumentation", - "Premain-Class", - "Agent-Class", - "Launcher-Agent-Class", -]; - -pub const ATTACH_API_MARKERS: &[&str] = &[ - "com/sun/tools/attach/VirtualMachine", - "sun/tools/attach/HotSpotVirtualMachine", -]; - -pub const NATIVE_BRIDGE_MARKERS: &[&str] = &["com/sun/jna/", "sun/misc/Unsafe"]; - -pub const SAFE_NATIVE_CALLS: &[&str] = &[ - "com.sun.jna.Native::getLastError()I", - "com.sun.jna.Native::toString", - "com.sun.jna.Native::load", - "com.sun.jna.Native::getNativeSize", - "com.sun.jna.Platform", - "com.sun.jna.Memory", - "com.sun.jna.Structure", - "com.sun.jna.Pointer", - "com.sun.jna.NativeLong", - "com.sun.jna.Callback", - "com.sun.jna.Library", - "com.sun.jna.TypeMapper", - "com.sun.jna.Union", - "com.sun.jna.ptr", - "com.sun.jna.win32", - "com.sun.jna.platform", -]; - -pub const NESTED_ARCHIVE_EXTENSIONS: &[&str] = &["jar", "zip", "jmod"]; -pub const SCRIPT_RESOURCE_EXTENSIONS: &[&str] = - &["bat", "cmd", "ps1", "vbs", "js", "hta", "wsf", "sh"]; -pub const EXECUTABLE_RESOURCE_EXTENSIONS: &[&str] = &["exe", "scr", "com", "msi"]; -pub const NATIVE_LIBRARY_EXTENSIONS: &[&str] = &["dll", "so", "dylib", "jnilib"]; diff --git a/src/main.rs b/src/main.rs index 7f7175d..7c486a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,9 @@ +mod cache; mod config; -mod constants; -mod detection; mod errors; -mod filters; mod output; -mod parser; +mod parsers; +mod rules; mod scanner; mod types; mod utils; @@ -12,11 +11,15 @@ mod utils; use { crate::{ output::{ - print_detailed_file_report, print_finding_statistics, print_general_info, - print_severity_matrix, + print_banner, print_detailed_file_report, print_empty_scan_result, + print_finding_statistics, print_general_info, print_scan_config, print_section_header, + print_severity_matrix, write_json_report, }, scanner::scan::CollapseScanner, - types::{DetectionMode, FindingType, Progress, ProgressScope, ScanResult, ScannerOptions}, + types::{DetectionMode, Progress, ProgressScope, ScanResult, ScannerOptions}, + utils::{ + is_progress_rendering_enabled, merge_filter_lists, parse_detection_mode_from_string, + }, }, clap::Parser, colored::Colorize, @@ -24,7 +27,6 @@ use { serde::Deserialize, serde_json::json, std::{ - collections::{HashMap, HashSet}, fs, io::{self, IsTerminal}, path::{Path, PathBuf}, @@ -32,7 +34,6 @@ use { thread, time::Duration, }, - walkdir::WalkDir, }; #[derive(Debug, Deserialize, Default)] @@ -145,12 +146,13 @@ impl ProgressReporter { let render_state = Arc::clone(&shared); let render_handle = thread::spawn(move || { let progress_bar = ProgressBar::new_spinner(); - let spinner_style = ProgressStyle::with_template("{spinner:.cyan} {msg}") + let spinner_style = ProgressStyle::with_template("{spinner:.cyan} {prefix}") .expect("valid spinner template"); let bar_style = ProgressStyle::with_template( - "{spinner:.cyan} {prefix:<8} [{wide_bar:.cyan/blue}] {pos:>4}/{len:<4} {msg}", + "{prefix} [{wide_bar:.cyan/blue}] {pos}/{len}", ) - .expect("valid progress template"); + .expect("valid progress template") + .progress_chars("#-"); progress_bar.enable_steady_tick(Duration::from_millis(120)); @@ -170,7 +172,8 @@ impl ProgressReporter { progress_bar.set_prefix(""); } - progress_bar.set_message(snapshot.message.clone()); + // Don't display file paths or verbose messages in the progress bar + progress_bar.set_message("".to_string()); if snapshot.finished { progress_bar.finish_and_clear(); @@ -208,74 +211,20 @@ impl ProgressReporter { } } -const BANNER_BOX: &str = - "+------------------------------------------------------------------------------+"; -const BANNER_BOTTOM: &str = - "+------------------------------------------------------------------------------+"; - -fn print_banner() { - println!("\n{}", BANNER_BOX.bright_blue().bold()); - println!( - "{}", - concat!( - "| CollapseScanner v", - env!("CARGO_PKG_VERSION"), - " |" - ) - .bright_blue() - .bold() - ); - println!( - "{}", - "| Java scanner, without exceptions |" - .bright_blue() - .bold() - ); - println!("{}", BANNER_BOTTOM.bright_blue().bold()); -} - -fn load_file_config(path: &Path) -> Result> { +fn load_config_from_file(path: &Path) -> Result> { let file_contents = fs::read_to_string(path)?; Ok(toml::from_str(&file_contents)?) } -fn merge_string_lists(config_values: Option>, cli_values: Vec) -> Vec { - let mut merged = config_values.unwrap_or_default(); - - for value in cli_values { - if !merged.iter().any(|existing| existing == &value) { - merged.push(value); - } - } - - merged -} - -fn parse_detection_mode(raw_mode: &str) -> Result> { - match raw_mode.trim().to_ascii_lowercase().as_str() { - "all" => Ok(DetectionMode::All), - "network" => Ok(DetectionMode::Network), - "malicious" => Ok(DetectionMode::Malicious), - "obfuscation" => Ok(DetectionMode::Obfuscation), - other => Err(io::Error::new( - io::ErrorKind::InvalidInput, - format!( - "Unsupported config mode '{other}'. Expected one of: all, network, malicious, obfuscation" - ), - ) - .into()), - } -} - -fn resolve_args(args: Args) -> Result> { +fn resolve_cli_arguments(args: Args) -> Result> { let config = if let Some(config_path) = &args.config { - Some(load_file_config(config_path)?) + Some(load_config_from_file(config_path)?) } else { None }; let config_mode = match config.as_ref().and_then(|cfg| cfg.mode.as_deref()) { - Some(raw_mode) => Some(parse_detection_mode(raw_mode)?), + Some(raw_mode) => Some(parse_detection_mode_from_string(raw_mode)?), None => None, }; @@ -300,11 +249,11 @@ fn resolve_args(args: Args) -> Result> ignore_keywords: args .ignore_keywords .or_else(|| config.as_ref().and_then(|cfg| cfg.ignore_keywords.clone())), - exclude: merge_string_lists( + exclude: merge_filter_lists( config.as_ref().and_then(|cfg| cfg.exclude.clone()), args.exclude, ), - find: merge_string_lists(config.as_ref().and_then(|cfg| cfg.find.clone()), args.find), + find: merge_filter_lists(config.as_ref().and_then(|cfg| cfg.find.clone()), args.find), threads: args .threads .or_else(|| config.as_ref().and_then(|cfg| cfg.threads)) @@ -313,7 +262,7 @@ fn resolve_args(args: Args) -> Result> }) } -fn create_scanner_options( +fn build_scanner_options( args: &ResolvedArgs, progress: Option>>, ) -> ScannerOptions { @@ -327,7 +276,7 @@ fn create_scanner_options( } } -fn configure_threading(args: &ResolvedArgs) -> Result<(), Box> { +fn configure_thread_pool(args: &ResolvedArgs) -> Result<(), Box> { let mut builder = rayon::ThreadPoolBuilder::new().stack_size(64 * 1024 * 1024); if args.threads > 0 { @@ -349,7 +298,7 @@ fn configure_threading(args: &ResolvedArgs) -> Result<(), Box Result> { +fn validate_scan_path(args: &ResolvedArgs) -> Result> { let path_arg = args.path.clone().unwrap_or_else(|| ".".to_string()); let path = PathBuf::from(&path_arg); if !path.exists() { @@ -369,44 +318,17 @@ fn mode_description(mode: DetectionMode) -> &'static str { } } -fn print_scan_configuration(path: &Path, args: &ResolvedArgs, scanner: &CollapseScanner) { - println!("\n{}", "Scan setup".bright_white().bold()); - println!(" Target : {}", path.display().to_string().bright_white()); - println!( - " Mode : {} ({})", - args.mode.to_string().bright_white(), - mode_description(args.mode).dimmed() +fn print_scan_info(path: &Path, args: &ResolvedArgs, scanner: &CollapseScanner) { + print_scan_config( + path, + args.mode.to_string(), + mode_description(args.mode), + &args.config, + &scanner.options.exclude_patterns, + &scanner.options.find_patterns, + &scanner.options.ignore_keywords_file, + args.verbose, ); - - print_optional_configurations(scanner, args); -} - -fn print_optional_configurations(scanner: &CollapseScanner, args: &ResolvedArgs) { - if let Some(config_path) = &args.config { - println!(" Config : {}", config_path.display().to_string().dimmed()); - } - - if !scanner.options.exclude_patterns.is_empty() { - println!(" Exclude:"); - for pattern in &scanner.options.exclude_patterns { - println!(" - {}", pattern.dimmed()); - } - } - - if !scanner.options.find_patterns.is_empty() { - println!(" Match only:"); - for pattern in &scanner.options.find_patterns { - println!(" - {}", pattern.dimmed()); - } - } - - if let Some(p) = &scanner.options.ignore_keywords_file { - println!(" Ignore : {}", p.display().to_string().dimmed()); - } - - if args.verbose { - println!(" Verbose: {}", "enabled".bright_white()); - } } fn calculate_scan_score(results: &[&ScanResult]) -> (u8, &'static str, &'static str) { @@ -454,23 +376,11 @@ fn calculate_scan_score(results: &[&ScanResult]) -> (u8, &'static str, &'static (score, score_color, risk_level) } -fn print_section_header(title: &str) { - println!("\n{}", BANNER_BOX.bright_blue().bold()); - println!("{}", format!("| {:<76} |", title).bright_blue().bold()); - println!("{}", BANNER_BOTTOM.bright_blue().bold()); +fn should_show_progress_bar(args: &ResolvedArgs) -> bool { + is_progress_rendering_enabled(args.json, io::stderr().is_terminal()) } -fn format_scan_stats(duration: Duration, total_files: usize) -> (f64, f64) { - let scan_time = duration.as_secs_f64(); - let scan_rate = if scan_time > 0.0 { - total_files as f64 / scan_time - } else { - 0.0 - }; - (scan_time, scan_rate) -} - -fn build_json_report( +fn build_json_result( results: &[ScanResult], significant_results: &[&ScanResult], elapsed: Duration, @@ -507,106 +417,7 @@ fn build_json_report( }) } -fn write_json_report( - output_path: &str, - report: &serde_json::Value, -) -> Result<(), Box> { - fs::write(output_path, serde_json::to_string_pretty(report)?)?; - Ok(()) -} - -fn has_scannable_files(path: &Path) -> bool { - if path.is_file() { - return path - .extension() - .is_some_and(|ext| ext == "jar" || ext == "class"); - } - - if path.is_dir() { - return WalkDir::new(path) - .into_iter() - .filter_map(|e| e.ok()) - .any(|e| { - e.file_type().is_file() - && e.path() - .extension() - .is_some_and(|ext| ext == "jar" || ext == "class") - }); - } - - false -} - -fn collect_finding_stats( - results: &[&ScanResult], -) -> (usize, HashMap>) { - let mut total_findings = 0; - let mut all_findings: HashMap> = HashMap::new(); - - for result in results { - for (finding_type, value) in result.matches.iter() { - total_findings += 1; - all_findings - .entry(*finding_type) - .or_default() - .insert(value.clone()); - } - } - - (total_findings, all_findings) -} - -fn print_empty_scan_result(path: &Path, scanner: &CollapseScanner) { - print_section_header("SCAN RESULTS"); - - if !has_scannable_files(path) { - println!( - "\n[-] {}", - "No .jar or .class files were found in the target path.".yellow() - ); - } else if !scanner.options.exclude_patterns.is_empty() - || !scanner.options.find_patterns.is_empty() - { - println!( - "\n[+] {}", - "No findings in files that matched your filters.".green() - ); - } else { - println!("\n[+] {}", "No findings for the selected mode.".green()); - } -} - -fn print_text_report( - significant_results: Vec<&ScanResult>, - path: &Path, - scanner: &CollapseScanner, - elapsed: Duration, -) { - if significant_results.is_empty() { - print_empty_scan_result(path, scanner); - return; - } - - let mut sorted_results = significant_results; - sorted_results.sort_by_key(|r| &r.file_path); - - print_section_header("SCAN SUMMARY"); - - if sorted_results.is_empty() { - return; - } - - print_detailed_file_report(&sorted_results); - - print_severity_matrix(&sorted_results); - print_finding_statistics(&sorted_results); - - print_general_info(&sorted_results, elapsed); - - println!("Scan complete. Review the findings above"); -} - -fn handle_json_output( +fn render_json_output( args: &ResolvedArgs, results: &[ScanResult], significant_results: &[&ScanResult], @@ -615,7 +426,7 @@ fn handle_json_output( let mut sorted_results = significant_results.to_vec(); sorted_results.sort_by_key(|r| &r.file_path); - let json_output = build_json_report(results, &sorted_results, elapsed); + let json_output = build_json_result(results, &sorted_results, elapsed); if let Some(output_path) = &args.output { write_json_report(output_path, &json_output)?; } else { @@ -625,7 +436,7 @@ fn handle_json_output( Ok(()) } -fn maybe_write_text_mode_json_report( +fn export_json_report_if_requested( args: &ResolvedArgs, results: &[ScanResult], scanner: &CollapseScanner, @@ -638,7 +449,7 @@ fn maybe_write_text_mode_json_report( .collect(); output_results.sort_by_key(|r| &r.file_path); - let report = build_json_report(results, &output_results, elapsed); + let report = build_json_result(results, &output_results, elapsed); write_json_report(output_path, &report)?; println!( "\n[+] JSON report written to {}", @@ -649,27 +460,55 @@ fn maybe_write_text_mode_json_report( Ok(()) } -fn should_render_progress(args: &ResolvedArgs) -> bool { - !args.json && io::stderr().is_terminal() +fn render_text_report( + significant_results: Vec<&ScanResult>, + path: &Path, + scanner: &CollapseScanner, + elapsed: Duration, +) { + if significant_results.is_empty() { + print_empty_scan_result( + path, + &scanner.options.exclude_patterns, + &scanner.options.find_patterns, + ); + return; + } + + let mut sorted_results = significant_results; + sorted_results.sort_by_key(|r| &r.file_path); + + print_section_header("SCAN SUMMARY"); + + if sorted_results.is_empty() { + return; + } + + print_detailed_file_report(&sorted_results); + print_severity_matrix(&sorted_results); + print_finding_statistics(&sorted_results); + print_general_info(&sorted_results, elapsed); + + println!("Scan complete. Review the findings above"); } fn main() -> Result<(), Box> { - let args = resolve_args(Args::parse())?; - let progress_reporter = ProgressReporter::start(should_render_progress(&args)); - let options = create_scanner_options(&args, progress_reporter.shared_state()); + let args = resolve_cli_arguments(Args::parse())?; + let progress_reporter = ProgressReporter::start(should_show_progress_bar(&args)); + let options = build_scanner_options(&args, progress_reporter.shared_state()); if !args.json { print_banner(); } - configure_threading(&args)?; + configure_thread_pool(&args)?; let scanner = CollapseScanner::new(options.clone())?; - let path = validate_and_prepare_path(&args)?; + let path = validate_scan_path(&args)?; if !args.json { - print_scan_configuration(&path, &args, &scanner); - if !should_render_progress(&args) { + print_scan_info(&path, &args, &scanner); + if !should_show_progress_bar(&args) { println!("\n>>> {}", "Scanning...".bright_green()); } } @@ -691,11 +530,11 @@ fn main() -> Result<(), Box> { )); if args.json { - handle_json_output(&args, &results, &significant_results, elapsed)?; + render_json_output(&args, &results, &significant_results, elapsed)?; return Ok(()); } - print_text_report(significant_results, &path, &scanner, elapsed); + render_text_report(significant_results, &path, &scanner, elapsed); let found_custom_jvm = *scanner.found_custom_jvm_indicator.lock().unwrap(); if found_custom_jvm { @@ -706,7 +545,7 @@ fn main() -> Result<(), Box> { ); } - maybe_write_text_mode_json_report(&args, &results, &scanner, elapsed)?; + export_json_report_if_requested(&args, &results, &scanner, elapsed)?; } Err(error) => { progress_reporter.finish("Scan failed".to_string()); diff --git a/src/output.rs b/src/output/mod.rs similarity index 60% rename from src/output.rs rename to src/output/mod.rs index 6c653c8..ee3fa35 100644 --- a/src/output.rs +++ b/src/output/mod.rs @@ -1,15 +1,159 @@ +//! Output formatting and reporting module +//! +//! Provides functions for formatting and displaying scan results in text and JSON formats. + use crate::{ - calculate_scan_score, collect_finding_stats, format_scan_stats, + calculate_scan_score, types::{FindingType, ScanResult}, }; use colored::Colorize; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::Path; +use std::time::Duration; + +pub fn collect_finding_stats( + results: &[&ScanResult], +) -> (usize, HashMap>) { + let mut total_findings = 0; + let mut all_findings: HashMap> = HashMap::new(); + + for result in results { + for (finding_type, value) in result.matches.iter() { + total_findings += 1; + all_findings + .entry(*finding_type) + .or_default() + .insert(value.clone()); + } + } + + (total_findings, all_findings) +} + +pub fn format_scan_stats(duration: Duration, total_files: usize) -> (f64, f64) { + let scan_time = duration.as_secs_f64(); + let scan_rate = if scan_time > 0.0 { + total_files as f64 / scan_time + } else { + 0.0 + }; + (scan_time, scan_rate) +} pub fn print_section_header(title: &str) { println!("\n{}", title.bright_cyan().bold()); println!("{}", "─".repeat(70).bright_black()); } +pub fn print_banner() { + const BANNER_BOX: &str = + "+------------------------------------------------------------------------------+"; + const BANNER_BOTTOM: &str = + "+------------------------------------------------------------------------------+"; + + println!("\n{}", BANNER_BOX.bright_blue().bold()); + println!( + "{}", + format!( + "|{:>28}CollapseScanner v{}{:>30}|", + "", + env!("CARGO_PKG_VERSION"), + "" + ) + .bright_blue() + .bold() + ); + println!( + "{}", + "| Java scanner, without exceptions |" + .bright_blue() + .bold() + ); + println!("{}", BANNER_BOTTOM.bright_blue().bold()); +} + +pub fn print_scan_config( + path: &Path, + mode_label: String, + mode_description: &str, + config_path: &Option, + exclude_patterns: &[String], + find_patterns: &[String], + ignore_keywords_file: &Option, + verbose: bool, +) { + println!("\n{}", "Scan setup".bright_white().bold()); + println!(" Target : {}", path.display().to_string().bright_white()); + println!( + " Mode : {} ({})", + mode_label.bright_white(), + mode_description.dimmed() + ); + + if let Some(config_path) = config_path { + println!(" Config : {}", config_path.display().to_string().dimmed()); + } + + if !exclude_patterns.is_empty() { + println!(" Exclude:"); + for pattern in exclude_patterns { + println!(" - {}", pattern.dimmed()); + } + } + + if !find_patterns.is_empty() { + println!(" Match only:"); + for pattern in find_patterns { + println!(" - {}", pattern.dimmed()); + } + } + + if let Some(p) = ignore_keywords_file { + println!(" Ignore : {}", p.display().to_string().dimmed()); + } + + if verbose { + println!(" Verbose: {}", "enabled".bright_white()); + } +} + +pub fn print_empty_scan_result(path: &Path, exclude_patterns: &[String], find_patterns: &[String]) { + const BANNER_BOX: &str = + "+------------------------------------------------------------------------------+"; + const BANNER_BOTTOM: &str = + "+------------------------------------------------------------------------------+"; + + println!("\n{}", BANNER_BOX.bright_blue().bold()); + println!( + "{}", + format!("| {:<76} |", "SCAN RESULTS").bright_blue().bold() + ); + println!("{}", BANNER_BOTTOM.bright_blue().bold()); + + if !crate::utils::path_contains_scannable_files(path) { + println!( + "\n[-] {}", + "No .jar or .class files were found in the target path.".yellow() + ); + } else if !exclude_patterns.is_empty() || !find_patterns.is_empty() { + println!( + "\n[+] {}", + "No findings in files that matched your filters.".green() + ); + } else { + println!("\n[+] {}", "No findings for the selected mode.".green()); + } +} + +pub fn write_json_report( + output_path: &str, + report: &serde_json::Value, +) -> Result<(), Box> { + fs::write(output_path, serde_json::to_string_pretty(report)?)?; + Ok(()) +} + pub fn print_general_info(sorted_results: &[&ScanResult], elapsed: std::time::Duration) { print_section_header("SCAN REPORT"); diff --git a/src/parser.rs b/src/parsers/class_parser.rs similarity index 84% rename from src/parser.rs rename to src/parsers/class_parser.rs index 027ed8d..b375463 100644 --- a/src/parser.rs +++ b/src/parsers/class_parser.rs @@ -1,3 +1,12 @@ +/// Java class file parser +/// +/// This module parses Java bytecode (.class files) and extracts: +/// - Class metadata (name, superclass, interfaces) +/// - Methods and fields +/// - Constant pool +/// - Method invocations +/// - String literals +/// use crate::errors::ScanError; use crate::types::{ClassDetails, ConstantPoolEntry, FieldInfo, MethodCallInfo, MethodInfo}; use byteorder::{BigEndian, ReadBytesExt}; @@ -5,6 +14,8 @@ use colored::Colorize; use std::collections::{HashSet, VecDeque}; use std::io::{Cursor, Seek, SeekFrom}; +pub struct ClassParser; + #[inline] fn check_bounds( cursor: &Cursor<&[u8]>, @@ -724,145 +735,148 @@ fn skip_attributes( Ok(()) } -pub fn parse_class_structure( - data: &[u8], - original_path_str: &str, - verbose: bool, -) -> Result { - let mut cursor = Cursor::new(data); +impl ClassParser { + /// Parse a Java class file from raw bytecode + pub fn parse( + data: &[u8], + original_path_str: &str, + verbose: bool, + ) -> Result { + let mut cursor = Cursor::new(data); - if data.len() < 10 { - return Err(ScanError::ClassParseError { - path: original_path_str.to_string(), - msg: "File too small for valid class header".to_string(), - }); - } + if data.len() < 10 { + return Err(ScanError::ClassParseError { + path: original_path_str.to_string(), + msg: "File too small for valid class header".to_string(), + }); + } - let magic = cursor.read_u32::()?; - if magic != 0xCAFEBABE { - return Err(ScanError::ClassParseError { - path: original_path_str.to_string(), - msg: format!( - "Invalid magic number: Expected 0xCAFEBABE, found {:#X}", - magic - ), - }); - } - let _minor_version = cursor.read_u16::()?; - let _major_version = cursor.read_u16::()?; - let cp_count = cursor.read_u16::()?; - if cp_count == 0 { - return Err(ScanError::ClassParseError { - path: original_path_str.to_string(), - msg: "Invalid constant pool count: 0".to_string(), - }); - } + let magic = cursor.read_u32::()?; + if magic != 0xCAFEBABE { + return Err(ScanError::ClassParseError { + path: original_path_str.to_string(), + msg: format!( + "Invalid magic number: Expected 0xCAFEBABE, found {:#X}", + magic + ), + }); + } + let _minor_version = cursor.read_u16::()?; + let _major_version = cursor.read_u16::()?; + let cp_count = cursor.read_u16::()?; + if cp_count == 0 { + return Err(ScanError::ClassParseError { + path: original_path_str.to_string(), + msg: "Invalid constant pool count: 0".to_string(), + }); + } + + let constant_pool = parse_constant_pool(&mut cursor, cp_count, original_path_str)?; - let constant_pool = parse_constant_pool(&mut cursor, cp_count, original_path_str)?; - - check_bounds( - &cursor, - 6, - original_path_str, - "access_flags, this_class, super_class", - )?; - let access_flags = cursor.read_u16::()?; - let this_class_index = cursor.read_u16::()?; - let super_class_index = cursor.read_u16::()?; - - let class_name = resolve_class_name( - &constant_pool, - this_class_index, - original_path_str, - "this_class", - )?; - let superclass_name = resolve_class_name( - &constant_pool, - super_class_index, - original_path_str, - "super_class", - )?; - - check_bounds(&cursor, 2, original_path_str, "interfaces_count")?; - let interfaces_count = cursor.read_u16::()?; - let mut interfaces = Vec::with_capacity(interfaces_count as usize); - for i in 0..interfaces_count { check_bounds( &cursor, - 2, + 6, original_path_str, - &format!("interface index {}", i), + "access_flags, this_class, super_class", )?; - let interface_index = cursor.read_u16::()?; + let access_flags = cursor.read_u16::()?; + let this_class_index = cursor.read_u16::()?; + let super_class_index = cursor.read_u16::()?; - interfaces.push(resolve_class_name( + let class_name = resolve_class_name( &constant_pool, - interface_index, + this_class_index, original_path_str, - &format!("interface {}", i), - )?); - } + "this_class", + )?; + let superclass_name = resolve_class_name( + &constant_pool, + super_class_index, + original_path_str, + "super_class", + )?; - check_bounds(&cursor, 2, original_path_str, "fields_count")?; - let fields_count = cursor.read_u16::()?; - let fields = parse_members( - &mut cursor, - fields_count, - original_path_str, - verbose, - "field", - &constant_pool, - |name, descriptor, access_flags| FieldInfo { - name, - descriptor, - access_flags, - }, - )?; - - check_bounds(&cursor, 2, original_path_str, "methods_count")?; - let methods_count = cursor.read_u16::()?; - let (methods, method_calls) = parse_methods( - &mut cursor, - methods_count, - original_path_str, - verbose, - &constant_pool, - )?; - - let mut string_set: HashSet = constant_pool - .iter() - .filter_map(|entry| match entry { - ConstantPoolEntry::Utf8(s) => Some(s.to_string()), + check_bounds(&cursor, 2, original_path_str, "interfaces_count")?; + let interfaces_count = cursor.read_u16::()?; + let mut interfaces = Vec::with_capacity(interfaces_count as usize); + for i in 0..interfaces_count { + check_bounds( + &cursor, + 2, + original_path_str, + &format!("interface index {}", i), + )?; + let interface_index = cursor.read_u16::()?; + + interfaces.push(resolve_class_name( + &constant_pool, + interface_index, + original_path_str, + &format!("interface {}", i), + )?); + } + + check_bounds(&cursor, 2, original_path_str, "fields_count")?; + let fields_count = cursor.read_u16::()?; + let fields = parse_members( + &mut cursor, + fields_count, + original_path_str, + verbose, + "field", + &constant_pool, + |name, descriptor, access_flags| FieldInfo { + name, + descriptor, + access_flags, + }, + )?; + + check_bounds(&cursor, 2, original_path_str, "methods_count")?; + let methods_count = cursor.read_u16::()?; + let (methods, method_calls) = parse_methods( + &mut cursor, + methods_count, + original_path_str, + verbose, + &constant_pool, + )?; + + let mut string_set: HashSet = constant_pool + .iter() + .filter_map(|entry| match entry { + ConstantPoolEntry::Utf8(s) => Some(s.to_string()), + _ => None, + }) + .collect(); + + for utf8_index in constant_pool.iter().filter_map(|entry| match entry { + ConstantPoolEntry::String(index) => Some(*index), _ => None, - }) - .collect(); - - for utf8_index in constant_pool.iter().filter_map(|entry| match entry { - ConstantPoolEntry::String(index) => Some(*index), - _ => None, - }) { - match resolve_utf8(&constant_pool, utf8_index, original_path_str) { - Ok(s) => { - string_set.insert(s.to_string()); - } - Err(e) => { - if verbose { - eprintln!("(!) String constant data resolution error: {}", e); + }) { + match resolve_utf8(&constant_pool, utf8_index, original_path_str) { + Ok(s) => { + string_set.insert(s.to_string()); + } + Err(e) => { + if verbose { + eprintln!("(!) String constant data resolution error: {}", e); + } } } } + let mut strings: Vec = string_set.into_iter().collect(); + strings.sort_unstable(); + + Ok(ClassDetails { + class_name, + superclass_name, + interfaces, + methods, + method_calls, + fields, + strings, + access_flags, + }) } - let mut strings: Vec = string_set.into_iter().collect(); - strings.sort_unstable(); - - Ok(ClassDetails { - class_name, - superclass_name, - interfaces, - methods, - method_calls, - fields, - strings, - access_flags, - }) } diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..1efbe3f --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1,3 @@ +pub mod class_parser; + +pub use class_parser::ClassParser; diff --git a/src/filters.rs b/src/rules.rs similarity index 59% rename from src/filters.rs rename to src/rules.rs index 9d416bd..5cee11f 100644 --- a/src/filters.rs +++ b/src/rules.rs @@ -3,6 +3,105 @@ use regex::Regex; use std::collections::HashSet; use std::net::IpAddr; +// ============================================================================ +// File Type Extensions +// ============================================================================ + +pub const JAR_EXTS: &[&str] = &["jar"]; +pub const CLASS_EXTS: &[&str] = &["class"]; +pub const JAR_CLASS_EXTS: &[&str] = &["jar", "class"]; + +pub const NESTED_ARCHIVE_EXTENSIONS: &[&str] = &["jar", "zip", "jmod"]; +pub const SCRIPT_RESOURCE_EXTENSIONS: &[&str] = + &["bat", "cmd", "ps1", "vbs", "js", "hta", "wsf", "sh"]; +pub const EXECUTABLE_RESOURCE_EXTENSIONS: &[&str] = &["exe", "scr", "com", "msi"]; +pub const NATIVE_LIBRARY_EXTENSIONS: &[&str] = &["dll", "so", "dylib", "jnilib"]; + +// ============================================================================ +// Suspicious Domains and Hosts +// ============================================================================ + +pub static SUSSY_DOMAINS: Lazy> = Lazy::new(|| { + [ + "discord.com", + "discordapp.com", + "discord.gg", + "cdn.discordapp.com", + "pastebin.com", + "hastebin.com", + "ghostbin.co", + "gofile.io", + "transfer.sh", + "webhook.site", + "requestbin.net", + "ngrok.io", + "ngrok-free.app", + "localtunnel.me", + "serveo.net", + "grabify.link", + "iplogger.org", + "ipify.org", + "ifconfig.me", + "bit.ly", + "tinyurl.com", + ] + .iter() + .map(|&s| s.to_lowercase()) + .collect() +}); + +// ============================================================================ +// Dynamic Code Execution Markers +// ============================================================================ + +pub const DYNAMIC_LOADING_MARKERS: &[&str] = + &["defineClass", "URLClassLoader", "Lookup.defineClass"]; + +pub const SCRIPT_ENGINE_MARKERS: &[&str] = &[ + "javax/script/ScriptEngineManager", + "javax/script/ScriptEngine", +]; + +pub const JAVA_AGENT_MARKERS: &[&str] = &[ + "java/lang/instrument/Instrumentation", + "Premain-Class", + "Agent-Class", + "Launcher-Agent-Class", +]; + +pub const ATTACH_API_MARKERS: &[&str] = &[ + "com/sun/tools/attach/VirtualMachine", + "sun/tools/attach/HotSpotVirtualMachine", +]; + +pub const NATIVE_BRIDGE_MARKERS: &[&str] = &["com/sun/jna/", "sun/misc/Unsafe"]; + +pub const SAFE_NATIVE_CALLS: &[&str] = &[ + "com.sun.jna.Native::getLastError()I", + "com.sun.jna.Native::toString", + "com.sun.jna.Native::load", + "com.sun.jna.Native::getNativeSize", + "com.sun.jna.Platform", + "com.sun.jna.Memory", + "com.sun.jna.Structure", + "com.sun.jna.Pointer", + "com.sun.jna.NativeLong", + "com.sun.jna.Callback", + "com.sun.jna.Library", + "com.sun.jna.TypeMapper", + "com.sun.jna.Union", + "com.sun.jna.ptr", + "com.sun.jna.win32", + "com.sun.jna.platform", + "sun.misc.Unsafe", + "com/sun/jna/Native", + "sun/misc/Unsafe", +]; + +// ============================================================================ +// Pattern Matching Regex Objects +// ============================================================================ + pub static IP_REGEX: Lazy = Lazy::new(|| { Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b").unwrap() }); @@ -34,24 +133,19 @@ pub static SECRET_REGEX: Lazy = Lazy::new(|| { "#).unwrap() }); +// ============================================================================ +// Known Good Links and IPs (Whitelist) +// ============================================================================ + pub static GOOD_LINKS: Lazy> = Lazy::new(|| { [ - "account.mojang.com", "aka.ms", "apache.org", - "api.mojang.com", - "api.spiget.org", - "authserver.mojang.com", - "bugs.mojang.com", - "cabaletta/baritone", "ci.viaversion.com", - "com/viaversion/", - "docs.advntr.dev", "dominos.com", "dump.viaversion.com", "eclipse.org", "java.sun.org", - "jo0001.github.io", "logging.apache.org", "login.live.com", "lwjgl.org", @@ -61,10 +155,7 @@ pub static GOOD_LINKS: Lazy> = Lazy::new(|| { "mojang.com", "netty.io", "optifine.net", - "paulscode/sound/", "s.optifine.net", - "sessionserver.mojang.com", - "shader-tutorial.dev", "snoop.minecraft.net", "tools.ietf.org", "viaversion.com", @@ -76,6 +167,12 @@ pub static GOOD_LINKS: Lazy> = Lazy::new(|| { "openssl.org", "yggdrasil-auth-session-staging.mojang.zone", "slf4j.org", + "xboxlive.com", + "minecraftservices.com", + "playfabapi.com", + "microsoft.com", + "live.com", + "w3.org", ] .into_iter() .map(str::to_owned) @@ -101,11 +198,18 @@ pub static GOOD_IPS: Lazy> = Lazy::new(|| { "8.8.4.4", "1.1.1.1", "9.9.9.9", + // just trash + "1.3.6.1", + "123.123.123.123", ] .into_iter() .collect() }); +// ============================================================================ +// IP Address Utilities +// ============================================================================ + fn parse_ip_range(range_str: &str) -> Option<(u32, u32)> { if !range_str.contains('/') { return None; @@ -197,22 +301,3 @@ pub fn is_public_routable_ip(ip: &str) -> bool { } } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn detects_token_like_secrets() { - assert!(SECRET_REGEX.is_match("token=abc1234567890ABCDEF_abcdef1234567890")); - assert!(SECRET_REGEX - .is_match("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payloadpayload.signaturesig")); - } - - #[test] - fn excludes_reserved_ip_ranges() { - assert!(!is_public_routable_ip("192.168.1.10")); - assert!(!is_public_routable_ip("203.0.113.12")); - assert!(is_public_routable_ip("93.184.216.34")); - } -} diff --git a/src/scanner/api_analyzer.rs b/src/scanner/api_analyzer.rs index dd59618..4b58d67 100644 --- a/src/scanner/api_analyzer.rs +++ b/src/scanner/api_analyzer.rs @@ -1,4 +1,4 @@ -use crate::constants::{ +use crate::rules::{ ATTACH_API_MARKERS, DYNAMIC_LOADING_MARKERS, JAVA_AGENT_MARKERS, NATIVE_BRIDGE_MARKERS, SAFE_NATIVE_CALLS, SCRIPT_ENGINE_MARKERS, }; @@ -35,29 +35,6 @@ impl ApiAnalyzer { )); } - let native_methods: Vec = details - .methods - .iter() - .filter(|method| method.access_flags & 0x0100 != 0) - .map(|method| method.name.clone()) - .collect(); - - if !native_methods.is_empty() { - let method_summary = if native_methods.len() == 1 { - native_methods[0].clone() - } else { - format!( - "{} (and {} more)", - native_methods[0], - native_methods.len() - 1 - ) - }; - findings.push(( - FindingType::SuspiciousApi, - format!("Native method declaration: {}", method_summary), - )); - } - let native_calls: Vec = details .method_calls .iter() diff --git a/src/scanner/class.rs b/src/scanner/class.rs index 45e7ac1..a054f95 100644 --- a/src/scanner/class.rs +++ b/src/scanner/class.rs @@ -4,13 +4,13 @@ use regex::Regex; use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use crate::detection::{cache_safe_string, calculate_detection_hash, is_cached_safe_string}; +use crate::cache::{cache_safe_string, calculate_detection_hash, is_cached_safe_string}; use crate::errors::ScanError; -use crate::filters::{ +use crate::rules::{ is_known_good_ip, is_public_routable_ip, IPV6_REGEX, IP_REGEX, MALICIOUS_PATTERN_REGEX, SECRET_REGEX, URL_REGEX, }; -use crate::parser::parse_class_structure; +use crate::parsers::ClassParser; use crate::scanner::api_analyzer::ApiAnalyzer; use crate::scanner::scan::CollapseScanner; use crate::types::{ClassDetails, DetectionMode, FindingType, ResourceInfo, ScanResult}; @@ -90,7 +90,7 @@ impl CollapseScanner { ); } - let class_details = parse_class_structure(data, original_path_str, self.options.verbose)?; + let class_details = ClassParser::parse(data, original_path_str, self.options.verbose)?; self.analyze_class_details(&class_details, &mut findings); diff --git a/src/scanner/jar.rs b/src/scanner/jar.rs index e23b3d8..4876613 100644 --- a/src/scanner/jar.rs +++ b/src/scanner/jar.rs @@ -8,7 +8,7 @@ use std::time::Instant; use zip::ZipArchive; use crate::config::SYSTEM_CONFIG; -use crate::constants::{ +use crate::rules::{ EXECUTABLE_RESOURCE_EXTENSIONS, NATIVE_LIBRARY_EXTENSIONS, NESTED_ARCHIVE_EXTENSIONS, SCRIPT_RESOURCE_EXTENSIONS, }; diff --git a/src/scanner/path.rs b/src/scanner/path.rs index 88f0f33..00820b2 100644 --- a/src/scanner/path.rs +++ b/src/scanner/path.rs @@ -7,7 +7,7 @@ use rayon::prelude::*; use walkdir::WalkDir; use crate::config::SYSTEM_CONFIG; -use crate::constants::{CLASS_EXTS, JAR_CLASS_EXTS, JAR_EXTS}; +use crate::rules::{CLASS_EXTS, JAR_CLASS_EXTS, JAR_EXTS}; use crate::errors::ScanError; use crate::scanner::scan::CollapseScanner; use crate::types::{ProgressScope, ScanResult}; diff --git a/src/scanner/scan.rs b/src/scanner/scan.rs index 66029ef..d4f69f2 100644 --- a/src/scanner/scan.rs +++ b/src/scanner/scan.rs @@ -2,9 +2,8 @@ use std::collections::HashMap; use std::sync::{Arc, RwLock}; use crate::config::SYSTEM_CONFIG; -use crate::constants::SUSSY_DOMAINS; +use crate::rules::{SUSSY_DOMAINS, GOOD_LINKS}; use crate::errors::ScanError; -use crate::filters::GOOD_LINKS; use crate::types::ScannerOptions; type ResultCache = Arc>>>>; diff --git a/src/utils.rs b/src/utils.rs index c29e7f3..ea8753a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,6 @@ +use crate::types::DetectionMode; +use std::{io, path::Path}; + pub fn truncate_string(s: &str, max_len: usize) -> String { if s.chars().count() <= max_len { s.to_owned() @@ -8,6 +11,19 @@ pub fn truncate_string(s: &str, max_len: usize) -> String { } } +pub fn path_contains_scannable_files(path: &Path) -> bool { + if let Ok(entries) = std::fs::read_dir(path) { + for entry in entries.flatten() { + if let Some(ext) = entry.path().extension().and_then(|e| e.to_str()) { + if matches!(ext.to_ascii_lowercase().as_str(), "jar" | "class") { + return true; + } + } + } + } + false +} + pub fn extract_domain(url_str: &str) -> String { let mut working_str = url_str.trim(); @@ -38,3 +54,39 @@ pub fn get_simple_name(fqn: &str) -> &str { let name_part = fqn.strip_suffix('/').unwrap_or(fqn); name_part.rsplit(['/', '.']).next().unwrap_or(name_part) } + +pub fn merge_filter_lists( + config_values: Option>, + cli_values: Vec, +) -> Vec { + let mut merged = config_values.unwrap_or_default(); + for value in cli_values { + if !merged.iter().any(|existing| existing == &value) { + merged.push(value); + } + } + merged +} + +pub fn parse_detection_mode_from_string( + raw_mode: &str, +) -> Result> { + match raw_mode.trim().to_ascii_lowercase().as_str() { + "all" => Ok(DetectionMode::All), + "network" => Ok(DetectionMode::Network), + "malicious" => Ok(DetectionMode::Malicious), + "obfuscation" => Ok(DetectionMode::Obfuscation), + other => Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "Unsupported mode '{}'. Expected one of: all, network, malicious, obfuscation", + other + ), + ) + .into()), + } +} + +pub fn is_progress_rendering_enabled(json_mode: bool, stderr_is_terminal: bool) -> bool { + !json_mode && stderr_is_terminal +}