From 726c4dfe58d484a780fea1f71c6e67765c13bc8e Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 21:56:31 +0400 Subject: [PATCH 01/19] Added tests for tag name extraction from tag links & search queries --- src/lib/philomena/tag-utils.ts | 2 +- tests/lib/philomena/tag-utils.spec.ts | 73 +++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tests/lib/philomena/tag-utils.spec.ts diff --git a/src/lib/philomena/tag-utils.ts b/src/lib/philomena/tag-utils.ts index 0890a2c..32a3a24 100644 --- a/src/lib/philomena/tag-utils.ts +++ b/src/lib/philomena/tag-utils.ts @@ -42,7 +42,7 @@ const tagLinkRegExp = /\/tags\/(?[^/?#]+)/; * * @see https://github.com/philomena-dev/philomena/blob/6086757b654da8792ae52adb2a2f501ea6c30d12/lib/philomena/slug.ex#L52-L57 */ -const slugEncodedCharacters: Map = new Map([ +export const slugEncodedCharacters: Map = new Map([ ['-dash-', '-'], ['-fwslash-', '/'], ['-bwslash-', '\\'], diff --git a/tests/lib/philomena/tag-utils.spec.ts b/tests/lib/philomena/tag-utils.spec.ts new file mode 100644 index 0000000..c95d56e --- /dev/null +++ b/tests/lib/philomena/tag-utils.spec.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from 'vitest'; +import { URL } from 'url'; +import { resolveTagNameFromLink, slugEncodedCharacters } from '$lib/philomena/tag-utils'; + +describe('tag-utils', () => { + const origin = 'https://furbooru.org'; + + describe('resolveTagNameFromLink', () => { + function resolveFromSearchQuery(encodedQuery: string): string | null { + return resolveTagNameFromLink(new URL(`/search?q=${encodedQuery}`, origin)); + } + + describe('Parsing from /search/?q=tag links', () => { + // Test cases for tags separated by commas + it('should resolve a single tag from /search URLs', () => { + expect(resolveFromSearchQuery('safe')).toBe('safe'); + }); + + it('should return null for queries with multiple comma-separated tags', () => { + // Comma acts as a separator in the query, resulting in multiple tokens + expect(resolveFromSearchQuery('safe, suggestive')).toBe(null); + }); + + it('should properly treat parentheses in the query with single tag', () => { + // Parentheses are operators in the query language, but when inside the tag name, they should still be properly + // working. + expect(resolveFromSearchQuery('experiment (casualties unknown)')).toBe('experiment (casualties unknown)'); + }); + + it('should properly resolve queries with encoded characters', () => { + expect(resolveFromSearchQuery('pok%C3%A9mon')).toBe('pokémon'); + }); + + it('should unquote quoted term', () => { + expect(resolveFromSearchQuery('"experiment (casualties unknown)"')).toBe('experiment (casualties unknown)') + expect(resolveFromSearchQuery('"single tag, really"')).toBe('single tag, really'); + }); + }) + + describe('Parsing from /tags/name links', () => { + function resolveFromTagLink(encodedTagName: string): string | null { + return resolveTagNameFromLink(new URL(`/tags/${encodedTagName}`, origin)); + } + + it('should resolve a single tag', () => { + expect(resolveFromTagLink('safe')).toBe('safe'); + }); + + it('should only read the tag page even if query is provided', () => { + expect(resolveFromTagLink('grotesque?q=explicit')).toBe('grotesque'); + }); + + it('should properly resolve links with encoded characters', () => { + expect(resolveFromTagLink('pok%C3%A9mon')).toBe('pokémon'); + }); + + it('should decoded slug-encoded characters', () => { + // More common example where tag is. + expect(resolveFromTagLink(`namespace-colon-tag+name`)).toBe('namespace:tag name'); + + // Testing the whole list of encoded characters. + for (const [encodedCharacter, decodedCharacter] of slugEncodedCharacters.entries()) { + expect(resolveFromTagLink(`test+symbol${encodedCharacter}without+spaces`)).toBe(`test symbol${decodedCharacter}without spaces`); + expect(resolveFromTagLink(`test+symbol+${encodedCharacter}+with+spaces`)).toBe(`test symbol ${decodedCharacter} with spaces`); + } + }); + }); + + it('should return null for unsupported URLs', () => { + expect(resolveTagNameFromLink(new URL('/pages/example', origin))).toBe(null); + }); + }); +}); From f01bfe8ae072a6a174b7c0bcf7c9517b60532e07 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:00:20 +0400 Subject: [PATCH 02/19] =?UTF-8?q?Removed=20erroneous=20`-`=20=E2=86=92=20`?= =?UTF-8?q?=20`=20conversion=20for=20tag=20links?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/philomena/tag-utils.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/philomena/tag-utils.ts b/src/lib/philomena/tag-utils.ts index 32a3a24..0035c2f 100644 --- a/src/lib/philomena/tag-utils.ts +++ b/src/lib/philomena/tag-utils.ts @@ -102,7 +102,6 @@ export function resolveTagNameFromLink(tagLink: URL): string | null { return decodeURIComponent(encodedTagName) .replaceAll(/-[a-z]+-/gi, match => slugEncodedCharacters.get(match) ?? match) - .replaceAll('-', ' ') .replaceAll('+', ' '); } From 736c0917c03a863ea6bfa2b4f98560e4460d21e1 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:01:14 +0400 Subject: [PATCH 03/19] =?UTF-8?q?Reversed=20`+`=20=E2=86=92=20`=20`=20conv?= =?UTF-8?q?ersion=20with=20dash-encoded=20characters=20conversion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way encoded `+` character will properly decode after other `+` were dealt with --- src/lib/philomena/tag-utils.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/philomena/tag-utils.ts b/src/lib/philomena/tag-utils.ts index 0035c2f..91b5464 100644 --- a/src/lib/philomena/tag-utils.ts +++ b/src/lib/philomena/tag-utils.ts @@ -101,8 +101,8 @@ export function resolveTagNameFromLink(tagLink: URL): string | null { } return decodeURIComponent(encodedTagName) - .replaceAll(/-[a-z]+-/gi, match => slugEncodedCharacters.get(match) ?? match) - .replaceAll('+', ' '); + .replaceAll('+', ' ') + .replaceAll(/-[a-z]+-/gi, match => slugEncodedCharacters.get(match) ?? match); } /** From 24d17416b53539eedfee710d99bc9c2525e4dd45 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:07:11 +0400 Subject: [PATCH 04/19] Cover the case when first term is not a term or when nothing is there --- tests/lib/philomena/tag-utils.spec.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/lib/philomena/tag-utils.spec.ts b/tests/lib/philomena/tag-utils.spec.ts index c95d56e..6ae1198 100644 --- a/tests/lib/philomena/tag-utils.spec.ts +++ b/tests/lib/philomena/tag-utils.spec.ts @@ -11,7 +11,6 @@ describe('tag-utils', () => { } describe('Parsing from /search/?q=tag links', () => { - // Test cases for tags separated by commas it('should resolve a single tag from /search URLs', () => { expect(resolveFromSearchQuery('safe')).toBe('safe'); }); @@ -21,6 +20,11 @@ describe('tag-utils', () => { expect(resolveFromSearchQuery('safe, suggestive')).toBe(null); }); + it('should return null if query is empty or not a term', () => { + expect(resolveFromSearchQuery('')).toBe(null); + expect(resolveFromSearchQuery('!')).toBe(null); + }); + it('should properly treat parentheses in the query with single tag', () => { // Parentheses are operators in the query language, but when inside the tag name, they should still be properly // working. From 3e0495a529284ffc50461faf5bfbd46c10ea1e8d Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:42:57 +0400 Subject: [PATCH 05/19] Moved parentheses check to run before dirty text content extraction --- src/lib/philomena/search/QueryLexer.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 76d2d5f..57ac1b9 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -207,12 +207,6 @@ export class QueryLexer { break; } - if (this.#matchAt(QueryLexer.#dirtyTextContent, index, result)) { - resultValue += result.match![0]; - index += result.match![0].length; - continue; - } - if (this.#value[index] === QueryLexer.#bracketsOpenCharacter) { let bracketsContent = QueryLexer.#bracketsOpenCharacter + this.#parseDirtyText(index + 1); @@ -227,6 +221,12 @@ export class QueryLexer { continue; } + if (this.#matchAt(QueryLexer.#dirtyTextContent, index, result)) { + resultValue += result.match![0]; + index += result.match![0].length; + continue; + } + break; } From 74f113412e5e617e32a3866f0ea02d960fefdac9 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:44:57 +0400 Subject: [PATCH 06/19] Fixed space being mandatory before `)` and `^` --- src/lib/philomena/search/QueryLexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 57ac1b9..c300085 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -243,6 +243,6 @@ export class QueryLexer { static #boostOperator = /\^[+-]?\d+(?:\.\d+)?/y; static #whitespaces = /\s+/y; static #quotedText = /"((?:\\.|[^\\"])+)"/y; - static #dirtyTextStopWords = /,|\s+(?:AND|&&|OR|\|\|)\s+|\s+(?:\)|\^[+-]?\d+(?:\.\d+)?)/y; + static #dirtyTextStopWords = /,|\s+(?:AND|&&|OR|\|\|)\s+|\s*(?:\)|\^[+-]?\d+(?:\.\d+)?)/y; static #dirtyTextContent = /\\.|[^()]/y; } From de5743259423cc7f44aaa23bee331fce1908baa6 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:46:08 +0400 Subject: [PATCH 07/19] Fixed wrong index used when checking for `)` character --- src/lib/philomena/search/QueryLexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index c300085..bfc0edf 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -210,7 +210,7 @@ export class QueryLexer { if (this.#value[index] === QueryLexer.#bracketsOpenCharacter) { let bracketsContent = QueryLexer.#bracketsOpenCharacter + this.#parseDirtyText(index + 1); - if (this.#value[index + bracketsContent.length + 1] === QueryLexer.#bracketsCloseCharacter) { + if (this.#value[index + bracketsContent.length] === QueryLexer.#bracketsCloseCharacter) { bracketsContent += QueryLexer.#bracketsCloseCharacter; } From 88ebcef18a2afc7d35196f9268d3c3de1c6fc034 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sat, 13 Jun 2026 22:57:37 +0400 Subject: [PATCH 08/19] Removed imports of already global functions --- tests/lib/philomena/tag-utils.spec.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/lib/philomena/tag-utils.spec.ts b/tests/lib/philomena/tag-utils.spec.ts index 6ae1198..a322a07 100644 --- a/tests/lib/philomena/tag-utils.spec.ts +++ b/tests/lib/philomena/tag-utils.spec.ts @@ -1,4 +1,3 @@ -import { describe, expect, it } from 'vitest'; import { URL } from 'url'; import { resolveTagNameFromLink, slugEncodedCharacters } from '$lib/philomena/tag-utils'; From 8c4c32c4bfa6ed0f899bdfe8a09fa7361a003a4d Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 00:29:33 +0400 Subject: [PATCH 09/19] Covering quoted term decoding/encoding behavior --- tests/lib/philomena/search/QueryLexer.spec.ts | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/lib/philomena/search/QueryLexer.spec.ts diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts new file mode 100644 index 0000000..1c7e9d3 --- /dev/null +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -0,0 +1,31 @@ +import { QueryLexer, QuotedTermToken, Token } from "$lib/philomena/search/QueryLexer"; + +describe('QueryLexer', () => { + function parseQuery(query: string): Token[] { + return new QueryLexer(query).parse(); + } + + describe('QuotedTermToken', () => { + it('should decode and encode quotes and backslash', () => { + const encodedQuote = `"term with \\\" inside of it"`; + const decodedQuote = 'term with " inside of it'; + + expect(QuotedTermToken.decode(encodedQuote)).toBe(decodedQuote); + expect(QuotedTermToken.encode(decodedQuote)).toBe(encodedQuote); + + const encodedBackslash = `"term with \\\\ inside of it"`; + const decodedBackslash = 'term with \\ inside of it'; + + expect(QuotedTermToken.decode(encodedBackslash)).toBe(decodedBackslash); + expect(QuotedTermToken.encode(decodedBackslash)).toBe(encodedBackslash); + }); + + it('should not care for anything else', () => { + const encodedTerm = '"operators: , && || AND OR NOT ! ^ ? *"'; + const decodedTerm = 'operators: , && || AND OR NOT ! ^ ? *'; + + expect(QuotedTermToken.decode(encodedTerm)).toBe(decodedTerm); + expect(QuotedTermToken.encode(decodedTerm)).toBe(encodedTerm); + }); + }); +}); From 40aa02ff701d312b67dbe2bbbffa99b024da175c Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 00:32:47 +0400 Subject: [PATCH 10/19] Fixed quoted term not processing starting & ending quotes --- src/lib/philomena/search/QueryLexer.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index bfc0edf..5099cf6 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -41,11 +41,13 @@ export class QuotedTermToken extends Token { } static decode(value: string): string { - return value.replace(/\\([\\"])/g, "$1"); + return value + .replaceAll(/\\([\\"])/g, "$1") + .replaceAll(/^"|"$/g, ''); } static encode(value: string): string { - return value.replace(/[\\"]/g, "\\$&"); + return `"${value.replaceAll(/[\\"]/g, "\\$&")}"`; } } From 6ceeabe17089249ccbe5c5c8f0cab31ff91c241d Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 01:28:37 +0400 Subject: [PATCH 11/19] Added tests for search query lexer --- tests/lib/philomena/search/QueryLexer.spec.ts | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts index 1c7e9d3..634c0de 100644 --- a/tests/lib/philomena/search/QueryLexer.spec.ts +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -1,10 +1,55 @@ -import { QueryLexer, QuotedTermToken, Token } from "$lib/philomena/search/QueryLexer"; +import { + AndToken, GroupEndToken, GroupStartToken, + NotToken, + OrToken, + QueryLexer, + QuotedTermToken, + TermToken, + Token +} from "$lib/philomena/search/QueryLexer"; describe('QueryLexer', () => { function parseQuery(query: string): Token[] { return new QueryLexer(query).parse(); } + function parseQueryTypes(query: string): (typeof Token)[] { + return parseQuery(query) + .map(term => (term.constructor as any) as typeof Token); + } + + it('should properly parse different kinds of queries', () => { + expect(parseQueryTypes('safe')).toEqual([TermToken]); + expect(parseQueryTypes('safe, avali')).toEqual([TermToken, AndToken, TermToken]); + expect(parseQueryTypes('!avali')).toEqual([NotToken, TermToken]); + expect(parseQueryTypes('avali || 4 ears')).toEqual([TermToken, OrToken, TermToken]); + expect(parseQueryTypes('avali && !4 ears')).toEqual([TermToken, AndToken, NotToken, TermToken]); + + expect(parseQueryTypes('avali AND (!4 ears OR -3 fingers)')).toEqual([ + TermToken, AndToken, GroupStartToken, NotToken, TermToken, OrToken, NotToken, TermToken, GroupEndToken, + ]); + }); + + it('should not treat parentheses as groups inside the term', () => { + expect(parseQueryTypes('!(experiment (casualties unknown) || milky (casualties unknown))')).toEqual([ + NotToken, GroupStartToken, TermToken, OrToken, TermToken, GroupEndToken, + ]); + }); + + it('should accept any amount of whitespaces between different tokens', () => { + expect(parseQueryTypes('! ( avali , experiment (casualties unknown) ) && safe')).toEqual([ + NotToken, GroupStartToken, TermToken, AndToken, TermToken, GroupEndToken, AndToken, TermToken, + ]); + }); + + it('should trim whitespaces inside the terms, even in quoted ones', () => { + const [termWithSpaces] = parseQuery(' avali '); + expect(termWithSpaces.value).toBe('avali'); + + const [quotedTermWithSpaces] = parseQuery(' " avali " '); + expect(quotedTermWithSpaces instanceof QuotedTermToken && quotedTermWithSpaces.decodedValue || new Error('Wrong token')).toBe('avali'); + }); + describe('QuotedTermToken', () => { it('should decode and encode quotes and backslash', () => { const encodedQuote = `"term with \\\" inside of it"`; From a90918079833f6f89433f1dd9acbba5d7fb5ecba Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 01:29:17 +0400 Subject: [PATCH 12/19] Fixed terms not trimming out the whitespaces --- src/lib/philomena/search/QueryLexer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 5099cf6..1eba2f5 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -151,7 +151,7 @@ export class QueryLexer { dirtyText = this.#parseDirtyText(this.#index); if (dirtyText) { - tokens.push(new TermToken(this.#index, dirtyText)); + tokens.push(new TermToken(this.#index, dirtyText.trim())); this.#index += dirtyText.length; continue; } @@ -244,7 +244,7 @@ export class QueryLexer { static #bracketsCloseCharacter = ")"; static #boostOperator = /\^[+-]?\d+(?:\.\d+)?/y; static #whitespaces = /\s+/y; - static #quotedText = /"((?:\\.|[^\\"])+)"/y; + static #quotedText = /"\s*((?:\\.|[^\\"])+?)\s*"/y; static #dirtyTextStopWords = /,|\s+(?:AND|&&|OR|\|\|)\s+|\s*(?:\)|\^[+-]?\d+(?:\.\d+)?)/y; static #dirtyTextContent = /\\.|[^()]/y; } From 7c462e1b5ccdc9914c5edfa7806118c30372cc6b Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 01:31:10 +0400 Subject: [PATCH 13/19] Added missing coverage for "NOT" operator --- tests/lib/philomena/search/QueryLexer.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts index 634c0de..36a6ce8 100644 --- a/tests/lib/philomena/search/QueryLexer.spec.ts +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -25,7 +25,7 @@ describe('QueryLexer', () => { expect(parseQueryTypes('avali || 4 ears')).toEqual([TermToken, OrToken, TermToken]); expect(parseQueryTypes('avali && !4 ears')).toEqual([TermToken, AndToken, NotToken, TermToken]); - expect(parseQueryTypes('avali AND (!4 ears OR -3 fingers)')).toEqual([ + expect(parseQueryTypes('avali AND (NOT 4 ears OR -3 fingers)')).toEqual([ TermToken, AndToken, GroupStartToken, NotToken, TermToken, OrToken, NotToken, TermToken, GroupEndToken, ]); }); From f8758306b78bd7679a110878cbd445f2fd988f2c Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 02:03:08 +0400 Subject: [PATCH 14/19] Added boost to the list of queries to test --- tests/lib/philomena/search/QueryLexer.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts index 36a6ce8..416638c 100644 --- a/tests/lib/philomena/search/QueryLexer.spec.ts +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -20,6 +20,7 @@ describe('QueryLexer', () => { it('should properly parse different kinds of queries', () => { expect(parseQueryTypes('safe')).toEqual([TermToken]); + expect(parseQueryTypes('safe^1')).toEqual([TermToken, BoostToken]); expect(parseQueryTypes('safe, avali')).toEqual([TermToken, AndToken, TermToken]); expect(parseQueryTypes('!avali')).toEqual([NotToken, TermToken]); expect(parseQueryTypes('avali || 4 ears')).toEqual([TermToken, OrToken, TermToken]); From ab255e535ca04ac65b0817358fc1731266f0ad94 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 02:03:41 +0400 Subject: [PATCH 15/19] Testing word-like operators and escaping them inside quotes --- tests/lib/philomena/search/QueryLexer.spec.ts | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts index 416638c..7f609cf 100644 --- a/tests/lib/philomena/search/QueryLexer.spec.ts +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -51,6 +51,37 @@ describe('QueryLexer', () => { expect(quotedTermWithSpaces instanceof QuotedTermToken && quotedTermWithSpaces.decodedValue || new Error('Wrong token')).toBe('avali'); }); + it('should properly differentiate between word-like operators and parts of tags', () => { + expect(parseQueryTypes('safe AND sound')).toEqual([TermToken, AndToken, TermToken]); + expect(parseQueryTypes('NOT safe AND dangerous')).toEqual([NotToken, TermToken, AndToken, TermToken]); + }); + + it('should only detect word-like operators when spaces are in place', () => { + // Require whitespace between operator and other tokens + expect(parseQueryTypes('NOT safeANDsound')).toEqual([NotToken, TermToken]); + + // If none are there, just should treat it as a part of a term + expect(parseQuery('safeAND sound')[0].value).toEqual('safeAND sound'); + + // All operators should be in all caps, otherwise it's just a term + const [lowercaseOperatorWords] = parseQuery('avali are cute and you know it or else'); + expect(lowercaseOperatorWords.value).toBe('avali are cute and you know it or else'); + + // And if it in caps, but part of some word, then it's just a word + const [wordsInCapsContainingOperators] = parseQuery('THAT POOR KNOT IS PLAIN AS SAND'); + expect(wordsInCapsContainingOperators.value).toBe('THAT POOR KNOT IS PLAIN AS SAND'); + }); + + it('should not treat any operators inside the quoted term as actual operators', () => { + const tokens = parseQuery('"this AND that OR these NOT there || () && ^123"'); + const [quotedTermToken] = tokens; + + expect(tokens).toHaveLength(1); + + expect(quotedTermToken instanceof QuotedTermToken && quotedTermToken.decodedValue || null) + .toBe('this AND that OR these NOT there || () && ^123'); + }); + describe('QuotedTermToken', () => { it('should decode and encode quotes and backslash', () => { const encodedQuote = `"term with \\\" inside of it"`; From 3a31eb2519a43b4a349231ba6ec45e61916dfd73 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 02:05:00 +0400 Subject: [PATCH 16/19] Formatting, missing import --- tests/lib/philomena/search/QueryLexer.spec.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/lib/philomena/search/QueryLexer.spec.ts b/tests/lib/philomena/search/QueryLexer.spec.ts index 7f609cf..125708a 100644 --- a/tests/lib/philomena/search/QueryLexer.spec.ts +++ b/tests/lib/philomena/search/QueryLexer.spec.ts @@ -1,5 +1,8 @@ import { - AndToken, GroupEndToken, GroupStartToken, + AndToken, + BoostToken, + GroupEndToken, + GroupStartToken, NotToken, OrToken, QueryLexer, From 9a6274c81594b32e7e4feccfa6d10b336e8c6bee Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 03:13:45 +0400 Subject: [PATCH 17/19] Sonar: Marked special characters and RegExps as readonly --- src/lib/philomena/search/QueryLexer.ts | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 1eba2f5..40adeb0 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -235,16 +235,16 @@ export class QueryLexer { return resultValue; } - static #commaCharacter = ','; - static #negotiationOperator = /[!-]/y; - static #andOperator = /\s+(?:AND|&&)\s+/y; - static #orOperator = /\s+(?:OR|\|\|)\s+/y; - static #notOperator = /NOT\s+/y; - static #bracketsOpenCharacter = "("; - static #bracketsCloseCharacter = ")"; - static #boostOperator = /\^[+-]?\d+(?:\.\d+)?/y; - static #whitespaces = /\s+/y; - static #quotedText = /"\s*((?:\\.|[^\\"])+?)\s*"/y; - static #dirtyTextStopWords = /,|\s+(?:AND|&&|OR|\|\|)\s+|\s*(?:\)|\^[+-]?\d+(?:\.\d+)?)/y; - static #dirtyTextContent = /\\.|[^()]/y; + static readonly #commaCharacter = ','; + static readonly #negotiationOperator = /[!-]/y; + static readonly #andOperator = /\s+(?:AND|&&)\s+/y; + static readonly #orOperator = /\s+(?:OR|\|\|)\s+/y; + static readonly #notOperator = /NOT\s+/y; + static readonly #bracketsOpenCharacter = "("; + static readonly #bracketsCloseCharacter = ")"; + static readonly #boostOperator = /\^[+-]?\d+(?:\.\d+)?/y; + static readonly #whitespaces = /\s+/y; + static readonly #quotedText = /"\s*((?:\\.|[^\\"])+?)\s*"/y; + static readonly #dirtyTextStopWords = /,|\s+(?:AND|&&|OR|\|\|)\s+|\s*(?:\)|\^[+-]?\d+(?:\.\d+)?)/y; + static readonly #dirtyTextContent = /\\.|[^()]/y; } From ed3db1240cde529fd6e360d1dbf4175671e9462f Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 03:15:51 +0400 Subject: [PATCH 18/19] Signaling to TypeScript that match is present when function returns true --- src/lib/philomena/search/QueryLexer.ts | 40 ++++++++++++++------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 40adeb0..2848aa1 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -54,10 +54,14 @@ export class QuotedTermToken extends Token { export class TermToken extends Token { } -type MatchResultCarry = { +interface MatchResultCarry { match?: RegExpMatchArray | null } +interface SuccessfulMatchResultCarry { + match: RegExpMatchArray; +} + /** * Search query tokenizer. Should mostly work for the cases of parsing and finding the selected term for * auto-completion. Follows the rules described in the Philomena booru engine. @@ -96,26 +100,26 @@ export class QueryLexer { } if (this.#match(QueryLexer.#negotiationOperator, result)) { - tokens.push(new NotToken(this.#index, result.match![0])); - this.#index += result.match![0].length; + tokens.push(new NotToken(this.#index, result.match[0])); + this.#index += result.match[0].length; continue; } if (this.#match(QueryLexer.#andOperator, result)) { - tokens.push(new AndToken(this.#index, result.match![0])); - this.#index += result.match![0].length; + tokens.push(new AndToken(this.#index, result.match[0])); + this.#index += result.match[0].length; continue; } if (this.#match(QueryLexer.#orOperator, result)) { - tokens.push(new OrToken(this.#index, result.match![0])); - this.#index += result.match![0].length; + tokens.push(new OrToken(this.#index, result.match[0])); + this.#index += result.match[0].length; continue; } if (this.#match(QueryLexer.#notOperator, result)) { - tokens.push(new NotToken(this.#index, result.match![0])); - this.#index += result.match![0].length; + tokens.push(new NotToken(this.#index, result.match[0])); + this.#index += result.match[0].length; continue; } @@ -132,19 +136,19 @@ export class QueryLexer { } if (this.#match(QueryLexer.#boostOperator, result)) { - tokens.push(new BoostToken(this.#index, result.match![0])); - this.#index += result.match![0].length; + tokens.push(new BoostToken(this.#index, result.match[0])); + this.#index += result.match[0].length; continue; } if (this.#match(QueryLexer.#whitespaces, result)) { - this.#index += result.match![0].length; + this.#index += result.match[0].length; continue; } if (this.#match(QueryLexer.#quotedText, result)) { - tokens.push(new QuotedTermToken(this.#index, result.match![0], result.match![1])); - this.#index += result.match![0].length; + tokens.push(new QuotedTermToken(this.#index, result.match[0], result.match[1])); + this.#index += result.match[0].length; continue; } @@ -170,7 +174,7 @@ export class QueryLexer { * * @return Is there a match? */ - #match(targetRegExp: RegExp, resultCarrier: MatchResultCarry = {}): boolean { + #match(targetRegExp: RegExp, resultCarrier: MatchResultCarry = {}): resultCarrier is SuccessfulMatchResultCarry { return this.#matchAt(targetRegExp, this.#index, resultCarrier); } @@ -183,7 +187,7 @@ export class QueryLexer { * * @return Is there a match? */ - #matchAt(targetRegExp: RegExp, index: number, resultCarrier: MatchResultCarry = {}): boolean { + #matchAt(targetRegExp: RegExp, index: number, resultCarrier: MatchResultCarry = {}): resultCarrier is SuccessfulMatchResultCarry { targetRegExp.lastIndex = index; resultCarrier.match = this.#value.match(targetRegExp); @@ -224,8 +228,8 @@ export class QueryLexer { } if (this.#matchAt(QueryLexer.#dirtyTextContent, index, result)) { - resultValue += result.match![0]; - index += result.match![0].length; + resultValue += result.match[0]; + index += result.match[0].length; continue; } From 3404877091d574e756579d979f722d5f86c961d7 Mon Sep 17 00:00:00 2001 From: KoloMl Date: Sun, 14 Jun 2026 03:19:54 +0400 Subject: [PATCH 19/19] Sonar: Replacing `string.match(regexp)` with `regexp.exec(string)` --- src/lib/philomena/search/QueryLexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/philomena/search/QueryLexer.ts b/src/lib/philomena/search/QueryLexer.ts index 2848aa1..b3fa598 100644 --- a/src/lib/philomena/search/QueryLexer.ts +++ b/src/lib/philomena/search/QueryLexer.ts @@ -189,7 +189,7 @@ export class QueryLexer { */ #matchAt(targetRegExp: RegExp, index: number, resultCarrier: MatchResultCarry = {}): resultCarrier is SuccessfulMatchResultCarry { targetRegExp.lastIndex = index; - resultCarrier.match = this.#value.match(targetRegExp); + resultCarrier.match = targetRegExp.exec(this.#value); return resultCarrier.match !== null; }