Skip to content

Commit dcf6d55

Browse files
authored
Add fuzzy filters (#33)
1 parent 365cc01 commit dcf6d55

File tree

20 files changed

+597
-3
lines changed

20 files changed

+597
-3
lines changed

frontend-laminar/src/main/scala/ru/d10xa/jsonlogviewer/ViewElement.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ object ViewElement {
3838
formatIn = config.formatIn,
3939
rawInclude = None,
4040
rawExclude = None,
41+
fuzzyInclude = None,
42+
fuzzyExclude = None,
4143
excludeFields = None,
4244
fieldNames = None,
4345
showEmptyFields = None

json-log-viewer/jvm/src/main/scala/ru/d10xa/jsonlogviewer/decline/yaml/ConfigYamlLoaderImpl.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ class ConfigYamlLoaderImpl extends ConfigYamlLoader {
201201
parseOptionalListString(feedFields, "rawInclude")
202202
val rawExcludeValidated =
203203
parseOptionalListString(feedFields, "rawExclude")
204+
val fuzzyIncludeValidated =
205+
parseOptionalListString(feedFields, "fuzzyInclude")
206+
val fuzzyExcludeValidated =
207+
parseOptionalListString(feedFields, "fuzzyExclude")
204208
val excludeFieldsValidated =
205209
parseOptionalListString(
206210
feedFields,
@@ -218,6 +222,8 @@ class ConfigYamlLoaderImpl extends ConfigYamlLoader {
218222
fieldNamesValidated,
219223
rawIncludeValidated,
220224
rawExcludeValidated,
225+
fuzzyIncludeValidated,
226+
fuzzyExcludeValidated,
221227
excludeFieldsValidated,
222228
showEmptyFieldsValidated
223229
)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package ru.d10xa.jsonlogviewer
2+
3+
import ru.d10xa.jsonlogviewer.config.ResolvedConfig
4+
5+
/** Fuzzy filter that searches for patterns across all fields in a parsed log
6+
* entry using token-based matching.
7+
*
8+
* Unlike rawFilter (regex on raw strings) or SQL filters (exact field
9+
* matching), fuzzy filter:
10+
* - Works after JSON parsing
11+
* - Searches across all fields (level, message, stackTrace, etc.)
12+
* - Uses tokenization to ignore punctuation
13+
* - Is case-insensitive
14+
* - Supports partial token matching
15+
*
16+
* Example: {{{ fuzzyInclude: ["error timeout"] // Will match: {"level":
17+
* "ERROR", "message": "Connection timeout"} {"message": "Error: request
18+
* timeout occurred"} {"error_code": "500", "details": "timeout"} }}}
19+
*
20+
* @param config
21+
* Resolved configuration containing fuzzyInclude and fuzzyExclude patterns
22+
*/
23+
class FuzzyFilter(config: ResolvedConfig) {
24+
25+
/** Collects all values from the parsed log entry into a single searchable
26+
* string.
27+
*
28+
* Includes standard fields (level, message, etc.) and all custom attributes
29+
* from otherAttributes.
30+
*
31+
* @param parseResult
32+
* Parsed log entry
33+
* @return
34+
* Space-separated concatenation of all field values
35+
*/
36+
private def collectAllValues(parseResult: ParseResult): String =
37+
parseResult.parsed match {
38+
case None => parseResult.raw // Fallback to raw string if parsing failed
39+
case Some(parsed) =>
40+
val standardFields = List(
41+
parsed.timestamp,
42+
parsed.level,
43+
parsed.message,
44+
parsed.stackTrace,
45+
parsed.loggerName,
46+
parsed.threadName
47+
).flatten
48+
49+
val otherValues = parsed.otherAttributes.values
50+
51+
(standardFields ++ otherValues).mkString(" ")
52+
}
53+
54+
/** Token-based fuzzy matching: checks if all tokens from the pattern exist in
55+
* the text.
56+
*
57+
* Uses partial matching: pattern token "timeout" will match text tokens
58+
* "timeout", "timeouts", "timeout_ms", etc.
59+
*
60+
* @param text
61+
* Text to search in (typically all log field values concatenated)
62+
* @param pattern
63+
* Search pattern (e.g., "error timeout")
64+
* @return
65+
* true if all pattern tokens are found in text tokens
66+
*/
67+
private def tokenBasedMatch(text: String, pattern: String): Boolean = {
68+
val textTokens = FuzzyTokenizer.tokenize(text)
69+
val patternTokens = FuzzyTokenizer.tokenize(pattern)
70+
71+
// All pattern tokens must be present in text tokens (with partial matching)
72+
patternTokens.forall { patternToken =>
73+
textTokens.exists(textToken => textToken.contains(patternToken))
74+
}
75+
}
76+
77+
/** Tests whether the parsed log entry matches fuzzyInclude and fuzzyExclude
78+
* patterns.
79+
*
80+
* Logic:
81+
* - fuzzyInclude: At least one pattern must match (OR logic)
82+
* - fuzzyExclude: No pattern should match (AND NOT logic)
83+
* - If fuzzyInclude is empty or None, all entries pass
84+
*
85+
* @param parseResult
86+
* Parsed log entry to test
87+
* @return
88+
* true if entry should be included in output
89+
*/
90+
def test(parseResult: ParseResult): Boolean = {
91+
val allValues = collectAllValues(parseResult)
92+
93+
val includeMatches = config.fuzzyInclude match {
94+
case None => true
95+
case Some(patterns) if patterns.isEmpty => true
96+
case Some(patterns) =>
97+
patterns.exists(pattern => tokenBasedMatch(allValues, pattern))
98+
}
99+
100+
val excludeMatches = config.fuzzyExclude match {
101+
case None => true
102+
case Some(patterns) =>
103+
patterns.forall(pattern => !tokenBasedMatch(allValues, pattern))
104+
}
105+
106+
includeMatches && excludeMatches
107+
}
108+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package ru.d10xa.jsonlogviewer
2+
3+
/** Tokenizer for fuzzy search that handles punctuation, quotes, and special
4+
* characters in log messages.
5+
*
6+
* Rules:
7+
* - Splits text into words while preserving meaningful characters
8+
* - Keeps dots and underscores inside words (e.g., john.doe, user_id)
9+
* - Removes standalone punctuation
10+
* - Converts to lowercase for case-insensitive matching
11+
* - Filters tokens shorter than 2 characters
12+
*
13+
* Examples: {{{ tokenize("User 'john.doe' timeout") → Set("user", "john.doe",
14+
* "timeout") tokenize("ERROR: database.query() failed") → Set("error",
15+
* "database.query", "failed") tokenize("card_number=1234") →
16+
* Set("card_number", "1234") }}}
17+
*/
18+
object FuzzyTokenizer {
19+
20+
/** Tokenizes text into a set of searchable words.
21+
*
22+
* @param text
23+
* Text to tokenize
24+
* @return
25+
* Set of normalized tokens (lowercase, minimum 2 characters)
26+
*/
27+
def tokenize(text: String): Set[String] = {
28+
// Pattern matches alphanumeric characters, dots, and underscores
29+
// This preserves: user_id, john.doe, 192.168.1.1, etc.
30+
val tokenPattern = """[\w._]+""".r
31+
32+
tokenPattern
33+
.findAllIn(text.toLowerCase)
34+
.toSet
35+
.filter(_.length >= 2)
36+
.filterNot(isOnlyPunctuation)
37+
}
38+
39+
/** Checks if a token consists only of non-alphanumeric characters.
40+
*
41+
* @param token
42+
* Token to check
43+
* @return
44+
* true if token contains only punctuation
45+
*/
46+
private def isOnlyPunctuation(token: String): Boolean =
47+
token.forall(c => !c.isLetterOrDigit)
48+
}

json-log-viewer/shared/src/main/scala/ru/d10xa/jsonlogviewer/LogViewerStream.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ object LogViewerStream {
123123
val timestampFilter = TimestampFilter()
124124
val parseResultKeys = ParseResultKeys(resolvedConfig)
125125
val logLineFilter = LogLineFilter(resolvedConfig, parseResultKeys)
126+
val fuzzyFilter = new FuzzyFilter(resolvedConfig)
126127

127128
val outputLineFormatter = resolvedConfig.formatOut match {
128129
case Some(Config.FormatOut.Raw) => RawFormatter()
@@ -142,6 +143,7 @@ object LogViewerStream {
142143
.map(parser.parse)
143144
.filter(logLineFilter.grep)
144145
.filter(logLineFilter.logLineQueryPredicate)
146+
.filter(fuzzyFilter.test)
145147
.through(
146148
timestampFilter.filterTimestampAfter(resolvedConfig.timestampAfter)
147149
)
@@ -165,6 +167,7 @@ object LogViewerStream {
165167
val timestampFilter = TimestampFilter()
166168
val parseResultKeys = ParseResultKeys(resolvedConfig)
167169
val logLineFilter = LogLineFilter(resolvedConfig, parseResultKeys)
170+
val fuzzyFilter = new FuzzyFilter(resolvedConfig)
168171

169172
val outputLineFormatter = resolvedConfig.formatOut match {
170173
case Some(Config.FormatOut.Raw) => RawFormatter()
@@ -183,6 +186,7 @@ object LogViewerStream {
183186
.map(csvHeaderParser.parse)
184187
.filter(logLineFilter.grep)
185188
.filter(logLineFilter.logLineQueryPredicate)
189+
.filter(fuzzyFilter.test)
186190
.through(
187191
timestampFilter.filterTimestampAfter(resolvedConfig.timestampAfter)
188192
)

json-log-viewer/shared/src/main/scala/ru/d10xa/jsonlogviewer/config/ResolvedConfig.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ final case class ResolvedConfig(
3030
// Feed-specific settings
3131
rawInclude: Option[List[String]],
3232
rawExclude: Option[List[String]],
33+
fuzzyInclude: Option[List[String]],
34+
fuzzyExclude: Option[List[String]],
3335
excludeFields: Option[List[String]],
3436

3537
// Timestamp settings
@@ -89,6 +91,8 @@ object ConfigResolver {
8991
fieldNames = feedFieldNames,
9092
rawInclude = feed.rawInclude,
9193
rawExclude = feed.rawExclude,
94+
fuzzyInclude = feed.fuzzyInclude,
95+
fuzzyExclude = feed.fuzzyExclude,
9296
excludeFields = feed.excludeFields,
9397
timestampAfter = config.timestamp.after,
9498
timestampBefore = config.timestamp.before,
@@ -109,6 +113,8 @@ object ConfigResolver {
109113
fieldNames = globalFieldNames,
110114
rawInclude = None,
111115
rawExclude = None,
116+
fuzzyInclude = None,
117+
fuzzyExclude = None,
112118
excludeFields = None,
113119
timestampAfter = config.timestamp.after,
114120
timestampBefore = config.timestamp.before,
@@ -130,6 +136,8 @@ object ConfigResolver {
130136
fieldNames = config.fieldNames,
131137
rawInclude = None,
132138
rawExclude = None,
139+
fuzzyInclude = None,
140+
fuzzyExclude = None,
133141
excludeFields = None,
134142
timestampAfter = config.timestamp.after,
135143
timestampBefore = config.timestamp.before,

json-log-viewer/shared/src/main/scala/ru/d10xa/jsonlogviewer/decline/yaml/Feed.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ case class Feed(
1212
fieldNames: Option[FieldNames],
1313
rawInclude: Option[List[String]],
1414
rawExclude: Option[List[String]],
15+
fuzzyInclude: Option[List[String]],
16+
fuzzyExclude: Option[List[String]],
1517
excludeFields: Option[List[String]],
1618
showEmptyFields: Option[Boolean]
1719
)

0 commit comments

Comments
 (0)