Appearance
Querying
How is Aeppic.find(q)
processed ?
The actual processing of the queries depends somewhat on the backend used. Let's go through the case of the default in-memory datamodel in a slightly simplified fashion.
- The query gets converted into a
QueryMatcher
- The query gets converted into an iterator which can be used to efficiently iterate all known documents using the available indexes
The matching function needs to be efficient of course, so it compiles the graph into an optimized testing function. Subsequent identical queries can re-use the same function (as long as only the parameters are different) which is likely to be optimized into native code by the javascript engine.
Even though the QueryMatcher is quite fast, it is best not to call it on documents that cannot possibly match anyway. Thus indexing is used to iterate only over a superset of documents that need to be tested.
The Iterator
is looking through the graph and calculates which Index to iterate in which order to avoid iterating over all documents.
In order to do that, it takes each subpart in the query e.g. f.id:X AND data.age:8
and checks whether those fields are covered by any index. If one field is covered, the Index is asked to provide an iterator over that subset of documents. In case multiple terms are covered they are intersected (in the case of AND
) with the smallest sets iterated first.
The end result is a enumeration of documents defining a possible superset of the requested data which then get tested using the QueryMatcher.
At the moment sort is done at the end as the iterators do not support sorted indexes
Potential Optimizations
INFO
There are a number of obvious optimizations possible. Such as using the knowledge of the schema and the data fields matched to limit searching. But none of those are implemented at the moment. The long-term goal is to move the data organization into a dedicated database such as sqlite or equivalent.
String Queries
Queries made using string queries using a variant of the lucene query syntax get parsed into a simple graph by a Parser
The parsing first uses and underlying raw PEG parser which emits a strict left/right syntax tree which then gets refined in order to join adjacent nodes into a more efficient structure (i.e. joining compatible adjacents terms into one condition) identical to what the fluent syntax would emit.
Some examples can be found in the tests and source
Example Graphs
Simple Query
json
{
graph: {
operator: 'AND',
conditions: [
{
field: 'p',
fieldPrefix: null,
term: { id: 0 },
termQuotes: 'none',
similarity: null,
boost: null,
termPrefix: null
},
{
field: 'f.id',
fieldPrefix: null,
term: { id: 1 },
termQuotes: 'none',
similarity: null,
boost: null,
termPrefix: null
},
{
field: 'data.name',
fieldPrefix: null,
term: { id: 2 },
termQuotes: 'none',
similarity: null,
boost: null,
termPrefix: null
}
]
},
parameters: { '0': 'my-root', '1': 'folder', '2': 'a*' },
...
}
Code
Fluent Query Builder
query-fluent.spec.js and fluent.ts
js
import test from 'ava'
import { Scopes, isQueryBuilder, ScopedQueryBuilder, toQuery } from '../../src/model/query/fluent.js'
import { isQuery } from '../../src/model/query/types.js'
/**
* @feature Query#Scopes
*/
test('Build a simple child query', t => {
const query = createRootChildrenScope().toQuery()
t.is(query.graph.conditions.length, 1)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
})
test('Requires a scope by default in the Builder', t => {
t.throws(() => new ScopedQueryBuilder())
})
test('Cannot alter finalized queries', t => {
const builder = createRootChildrenScope()
builder.toQuery()
t.throws(() => builder.where('a').is(3))
})
test('Can test for builders', t => {
t.false(isQueryBuilder())
t.false(isQueryBuilder(createRootChildrenScope().toQuery()))
t.true(isQueryBuilder(createRootChildrenScope()))
})
test('Can test for queries', t => {
t.false(isQuery())
t.true(isQuery(createRootChildrenScope().toQuery()))
})
test('Build a simple child query with a filter on forms', t => {
const query = createRootChildrenScope().form('folder-form').toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'f.id',
term: 'folder-form',
})
})
test('Build a simple child query with a filter on many forms', t => {
const query = createRootChildrenScope().forms(['folder-form', 'some-other-form']).toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'f.id',
term: ['folder-form', 'some-other-form'],
})
})
test('Build a simple child query with a generic matching term', t => {
const query = createRootChildrenScope().where('a').is('SOMETHING').toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'a',
term: 'SOMETHING',
})
})
test('Build a simple child query with some arbitrary OR matching term', t => {
const query = createRootChildrenScope().where('someField').isOneOf(['SOMETHING', 'SOMETHING_ELSE']).toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
})
test('Build a simple child query with some arbitrary range matching term', t => {
const query = createRootChildrenScope().where('someField').isInRange(0, 4).toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '0',
/* eslint-disable-next-line camelcase */
term_max: '4',
})
})
test('Build a simple child query with some open range matching term', t => {
const query = createRootChildrenScope().where('someField').isInRange(null, 4).toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '*',
/* eslint-disable-next-line camelcase */
term_max: '4',
})
})
test('Build a simple child query with some open range end matching term', t => {
const query = createRootChildrenScope().where('someField').isInRange(3).toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '3',
/* eslint-disable-next-line camelcase */
term_max: '*',
})
})
test('Build a simple child query with some date range matching term', t => {
const query = createRootChildrenScope().where('someField').isInRange('2024-06', '*').toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '2024-06',
/* eslint-disable-next-line camelcase */
term_max: '*',
})
})
test('Build a simple child query with a full open range matching term', t => {
const query = createRootChildrenScope().where('someField').isTruthy().toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '*',
/* eslint-disable-next-line camelcase */
term_max: '*',
})
})
test('Build a simple children query without an ancestor', t => {
t.throws(() => Scopes.children().where('someField').isTruthy().toQuery())
})
test('Convert a builder and a query to a query', t => {
const convertedABuilder = toQuery(Scopes.global().form('form'))
const alreadyAQuery = toQuery(Scopes.global().form('form').toQuery())
t.true(isQuery(convertedABuilder))
t.true(isQuery(alreadyAQuery))
})
test('Build a simple descendant query with a full open range matching term', t => {
const query = Scopes.descendants('ancestor').where('someField').isTruthy().toQuery()
t.is(query.graph.operator, 'AND')
t.is(query.graph.conditions.length, 2)
validateCondition(t, query.graph.conditions[0], query.parameters, {
field: 'a',
term: 'ancestor',
})
validateCondition(t, query.graph.conditions[1], query.parameters, {
field: 'someField',
/* eslint-disable-next-line camelcase */
term_min: '*',
/* eslint-disable-next-line camelcase */
term_max: '*',
})
})
test('Build a simple descendant query without an ancestor', t => {
t.throws(() => Scopes.descendants().where('someField').isTruthy().toQuery())
})
test('Build a query with a simple top-level OR', t => {
const { graph, parameters } = Scopes.global()
.where('someField').isOneOf(['SOMETHING', 'SOMETHING_ELSE'])
.or
.where('someOtherField').is('test')
.where('yetSomeOtherField').is('also-test')
.toQuery()
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 2)
validateCondition(t, graph.conditions[0], parameters, {
field: 'someField',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
t.is(graph.conditions[1].operator, 'AND')
validateCondition(t, graph.conditions[1].conditions[0], parameters, {
field: 'someOtherField',
term: 'test',
})
validateCondition(t, graph.conditions[1].conditions[1], parameters, {
field: 'yetSomeOtherField',
term: 'also-test',
})
})
test('Build a query with a few matches', t => {
const { graph, parameters } = Scopes.global()
.where('someField').is('text')
.where('someOtherField').is(2)
.where('yetSomeOtherField').is(false)
.toQuery()
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
validateCondition(t, graph.conditions[0], parameters, {
field: 'someField',
term: 'text',
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'someOtherField',
term: 2,
})
validateCondition(t, graph.conditions[2], parameters, {
field: 'yetSomeOtherField',
term: false,
})
})
test('Build a query with a more AND/OR joins', t => {
const { graph, parameters } = Scopes.global()
.where('someFieldA').isOneOf(['SOMETHING', 'SOMETHING_ELSE'])
.where('someFieldB').is(3)
.or
.where('someOtherField').is('test')
.where('someFieldB').is(8)
.where('someFieldC').isOneOf([10, 11, 20])
.toQuery()
console.dir(graph, { colors: true, depth: null })
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 2)
t.is(graph.conditions[0].operator, 'AND')
validateCondition(t, graph.conditions[0].conditions[0], parameters, {
field: 'someFieldA',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
validateCondition(t, graph.conditions[0].conditions[1], parameters, {
field: 'someFieldB',
term: 3,
})
t.is(graph.conditions[1].operator, 'AND')
validateCondition(t, graph.conditions[1].conditions[0], parameters, {
field: 'someOtherField',
term: 'test',
})
validateCondition(t, graph.conditions[1].conditions[1], parameters, {
field: 'someFieldB',
term: 8,
})
validateCondition(t, graph.conditions[1].conditions[2], parameters, {
field: 'someFieldC',
term: [10, 11, 20],
})
})
test('Build a query with more than 2 OR joins', t => {
const { graph, parameters } = Scopes.global()
.where('someFieldA').isOneOf(['SOMETHING', 'SOMETHING_ELSE'])
.where('someFieldB').is(3)
.or
.where('someOtherField').is('test')
.where('someFieldB').is('8') // as string
.or
.where('someFieldC').isOneOf([10, 11, 20])
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 3)
t.is(graph.conditions[0].operator, 'AND')
validateCondition(t, graph.conditions[0].conditions[0], parameters, {
field: 'someFieldA',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
validateCondition(t, graph.conditions[0].conditions[1], parameters, {
field: 'someFieldB',
term: 3,
})
t.is(graph.conditions[1].operator, 'AND')
validateCondition(t, graph.conditions[1].conditions[0], parameters, {
field: 'someOtherField',
term: 'test',
})
validateCondition(t, graph.conditions[1].conditions[1], parameters, {
field: 'someFieldB',
term: '8',
})
validateCondition(t, graph.conditions[2], parameters, {
field: 'someFieldC',
term: [10, 11, 20],
})
})
test('Build a query with more than 2 OR joins but only a single field match between the ORs', t => {
const { graph, parameters } = Scopes.global()
.where('someFieldA').isOneOf(['SOMETHING', 'SOMETHING_ELSE'])
.or
.where('someOtherField').is('test')
.or
.where('someFieldC').isOneOf([10, 11, 20])
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 3)
validateCondition(t, graph.conditions[0], parameters, {
field: 'someFieldA',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'someOtherField',
term: 'test',
})
validateCondition(t, graph.conditions[2], parameters, {
field: 'someFieldC',
term: [10, 11, 20],
})
})
test('Build a query with more than 2 OR joins (with multi-term AND joins in-between)', t => {
const { graph, parameters } = Scopes.global()
.where('someFieldA').isOneOf(['SOMETHING', 'SOMETHING_ELSE'])
.where('someFieldB').is(3)
.or
.where('someOtherField').is('test')
.where('someFieldB').is(8)
.where('someFieldC').is(50)
.or
.where('someFieldB').is(50)
.where('someFieldC').isOneOf([10, 11, 20])
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 3)
t.is(graph.conditions[0].operator, 'AND')
validateCondition(t, graph.conditions[0].conditions[0], parameters, {
field: 'someFieldA',
term: ['SOMETHING', 'SOMETHING_ELSE'],
})
validateCondition(t, graph.conditions[0].conditions[1], parameters, {
field: 'someFieldB',
term: 3,
})
t.is(graph.conditions[1].operator, 'AND')
validateCondition(t, graph.conditions[1].conditions[0], parameters, {
field: 'someOtherField',
term: 'test',
})
validateCondition(t, graph.conditions[1].conditions[1], parameters, {
field: 'someFieldB',
term: 8,
})
validateCondition(t, graph.conditions[1].conditions[2], parameters, {
field: 'someFieldC',
term: 50,
})
t.is(graph.conditions[2].operator, 'AND')
validateCondition(t, graph.conditions[2].conditions[0], parameters, {
field: 'someFieldB',
term: 50,
})
validateCondition(t, graph.conditions[2].conditions[1], parameters, {
field: 'someFieldC',
term: [10, 11, 20],
})
})
test('Build a query with a subquery', t => {
const { graph, parameters } = createRootChildrenScope()
.where('name').is('sub-query')
.subQuery(q => (
q.where('someFieldB').is(5)
.where('someFieldC').is(6)
.or
.where('someFieldB').is(10)
))
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
validateCondition(t, graph.conditions[0], parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'name',
term: 'sub-query',
})
t.is(graph.conditions[2].operator, 'OR')
t.is(graph.conditions[2].conditions[0].operator, 'AND')
validateCondition(t, graph.conditions[2].conditions[0].conditions[0], parameters, {
field: 'someFieldB',
term: 5,
})
validateCondition(t, graph.conditions[2].conditions[0].conditions[1], parameters, {
field: 'someFieldC',
term: 6,
})
validateCondition(t, graph.conditions[2].conditions[1], parameters, {
field: 'someFieldB',
term: 10,
})
})
test('Build a query with an empty subquery', t => {
const { graph, parameters } = createRootChildrenScope()
.where('name').is('sub-query')
.subQuery(q => q)
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 2)
validateCondition(t, graph.conditions[0], parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'name',
term: 'sub-query',
})
})
test('Build a query with a subquery combined with an OR', t => {
const { graph, parameters } = createRootChildrenScope()
.where('name').is('sub-query')
.or
.subQuery(q => (
q.where('someFieldB').is(5)
.where('someFieldC').is(6)
.or
.where('someFieldB').is(10)
))
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 2)
validateCondition(t, graph.conditions[0], parameters, {
field: 'p',
term: 'root',
})
t.is(graph.conditions[1].operator, 'OR')
t.is(graph.conditions[1].conditions.length, 2)
validateCondition(t, graph.conditions[1].conditions[0], parameters, {
field: 'name',
term: 'sub-query',
})
t.is(graph.conditions[1].conditions[1].operator, 'OR')
t.is(graph.conditions[1].conditions[1].conditions[0].operator, 'AND')
validateCondition(t, graph.conditions[1].conditions[1].conditions[0].conditions[0], parameters, {
field: 'someFieldB',
term: 5,
})
validateCondition(t, graph.conditions[1].conditions[1].conditions[0].conditions[1], parameters, {
field: 'someFieldC',
term: 6,
})
validateCondition(t, graph.conditions[1].conditions[1].conditions[1], parameters, {
field: 'someFieldB',
term: 10,
})
})
test('Build a query with a subquery combined with an AND', t => {
const { graph, parameters } = createRootChildrenScope()
.where('name').is('sub-query')
.subQuery(q => (
q.where('someFieldB').is(5)
))
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
validateCondition(t, graph.conditions[0], parameters, {
field: 'p',
term: 'root',
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'name',
term: 'sub-query',
})
validateCondition(t, graph.conditions[2], parameters, {
field: 'someFieldB',
term: 5,
})
})
test('Build a query ANDS', t => {
const { graph, parameters } = Scopes.global()
.where('nameA').is('a')
.where('nameB').is('b')
.where('nameC').is('c')
.toQuery()
console.dir(graph, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
validateCondition(t, graph.conditions[0], parameters, {
field: 'nameA',
term: 'a',
})
validateCondition(t, graph.conditions[1], parameters, {
field: 'nameB',
term: 'b',
})
validateCondition(t, graph.conditions[2], parameters, {
field: 'nameC',
term: 'c',
})
})
function createRootChildrenScope() {
return Scopes.children('root')
}
function validateCondition(t, condition, parameters, comparisons) {
for (const [fieldName, value] of Object.entries(comparisons)) {
const fieldInCondition = condition[fieldName]
// console.log(fieldName, value, condition, fieldInCondition)
if (typeof fieldInCondition === 'object' && 'id' in fieldInCondition) {
const fieldValue = parameters[fieldInCondition.id]
t.deepEqual(fieldValue, value)
} else {
t.deepEqual(fieldInCondition, value)
}
}
return true
}
ts
// export class QueryBuilder {
import * as Types from '@aeppic/types'
import getNow from '@aeppic/shared/now'
import * as QueryTypes from './types.js'
import { isReference } from 'model/is.js'
export type ScopedQueryMatchOptions = QueryTypes.MatchOptions & {
allowUnscopedQuery?: boolean
}
export interface SubQueryDefinitionFunction {
(subQueryBuilder: QueryBuilder): void
}
type QueryScope = {
field: string
value: string
}
export function isQueryBuilder(builder: any): builder is QueryBuilder {
return builder instanceof QueryBuilder
}
export function toQuery(query: QueryTypes.Query|QueryBuilder): QueryTypes.Query {
if (isQueryBuilder(query)) {
return query.toQuery()
}
return query
}
type DocumentMinimalReference = {
id: Types.DocumentId
}
export class QueryScopes {
constructor(private _defaultParentId?: string) {}
global(options: QueryTypes.MatchOptions = {}) {
return new ScopedQueryBuilder(null, { ...options, allowUnscopedQuery: true })
}
// Ancestors(matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
// Ancestors(rootDocumentId: Types.DocumentId, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
// Ancestors(arg1: QueryTypes.MatchOptions|Types.DocumentId, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder {
// let rootDocumentId = this._defaultParentId
// if (typeof arg1 === 'string') {
// rootDocumentId = arg1
// } else {
// matchOptions = arg1
// }
// if (!rootDocumentId) {
// throw new Error('Missing parentId')
// }
// const scope: QueryScope = {
// field: 'id',
// value: new QueryBuilder().where('id').is(rootDocumentId).select(d => d.a)
// }
// return new ScopedQueryBuilder(scope, matchOptions)
// }
children(matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
children(parent: DocumentMinimalReference, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
children(parentId: Types.DocumentId, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
children(arg1: QueryTypes.MatchOptions|Types.DocumentId|DocumentMinimalReference, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder {
let parentId = this._defaultParentId
if (typeof arg1 === 'string') {
parentId = arg1
} else if (arg1 && 'id' in arg1) {
parentId = arg1.id
} else {
matchOptions = arg1 as any
}
if (!parentId) {
throw new Error('Missing parentId')
}
const childrenCondition: QueryScope = {
field: 'p',
value: parentId
}
return new ScopedQueryBuilder(childrenCondition, matchOptions)
}
descendants(matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
descendants(ancestor: DocumentMinimalReference, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
descendants(ancestorId: Types.DocumentId, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder
descendants(arg1: QueryTypes.MatchOptions|Types.DocumentId|DocumentMinimalReference, matchOptions?: QueryTypes.MatchOptions): ScopedQueryBuilder {
let ancestorId = this._defaultParentId
if (typeof arg1 === 'string') {
ancestorId = arg1
} else if (arg1 && 'id' in arg1) {
ancestorId = arg1.id
} else {
matchOptions = arg1 as any
}
if (!ancestorId) {
throw new Error('Missing parentId')
}
const descendantCondition: QueryScope = {
field: 'a',
value: ancestorId
}
return new ScopedQueryBuilder(descendantCondition, matchOptions)
}
}
export const Scopes = new QueryScopes()
export type IScopes = typeof Scopes
type QueryBuilderOptions = { queryParameters?: Parameters } & QueryTypes.MatchOptions
class Parameters {
private _parameterCount = 0
private _parameters: QueryTypes.QueryParameters = []
add(value: string|number|boolean|(string|number|boolean)[]): QueryTypes.QueryParameterReference {
const nextParameterId = this._parameterCount ++
const parameter = { id: nextParameterId }
this._parameters[nextParameterId] = value
return parameter
}
toQueryParameters(): QueryTypes.QueryParameters {
return this._parameters
}
}
export class QueryBuilder {
private _expression: QueryTypes.MatchExpression
private _matchOptions: QueryTypes.MatchOptions
private _finalized = false
private _appendNextConditionAsOr = false
private _parameters: Parameters
constructor({ queryParameters, ...matchOptions }: QueryBuilderOptions = {}) {
this._expression = {
conditions: [],
}
this._matchOptions = matchOptions
if (!this._matchOptions.nowUtcTz) {
this._matchOptions.nowUtcTz = getNow().isoTz
}
this._parameters = queryParameters || new Parameters()
}
// TODO: Include clone with matching spec
//
// clone(): QueryBuilder {
// const clone = new QueryBuilder()
// clone._expression = JSON.parse(JSON.stringify(this._expression))
// clone._matchOptions = JSON.parse(JSON.stringify(this._matchOptions))
// clone._finalized = JSON.parse(JSON.stringify(this._finalized))
// clone._appendNextConditionAsOr = JSON.parse(JSON.stringify(this._appendNextConditionAsOr))
// clone._parameters = JSON.parse(JSON.stringify(this._parameters))
// return clone
// }
addParameter(value: string|number|boolean|(string|number|boolean)[]): QueryTypes.QueryParameterReference {
assertNotFinalized(this)
return this._parameters.add(value)
}
get finalized() {
return this._finalized
}
where(fieldPath: string): FieldCondition {
assertNotFinalized(this)
return new FieldCondition(fieldPath, this)
}
form(formId: string): QueryBuilder {
return this.where('f.id').is(formId)
}
forms(formIds: string[]): QueryBuilder {
return this.where('f.id').isOneOf(formIds)
}
stamp(stampTypeId: string) {
return this.form('stamp').where('data.type.id').is(stampTypeId)
}
stamps(stampTypeIds: string[]|undefined) {
let q = this.form('stamp')
if (stampTypeIds) {
return q.where('data.type.id').isOneOf(stampTypeIds)
} else {
return q
}
}
stamped(stampTypeId: string|string[]) {
const stampIdsToCheck = Array.isArray(stampTypeId) ? stampTypeId : [stampTypeId]
return this.where('stamps.type.id').isOneOf(stampIdsToCheck)
}
get or(): QueryBuilder {
assertNotFinalized(this)
this._appendNextConditionAsOr = true
return this
}
subQuery(subQueryDefinitionFunction: SubQueryDefinitionFunction): QueryBuilder {
assertNotFinalized(this)
const subQueryBuilder = new QueryBuilder({ queryParameters: this._parameters })
subQueryDefinitionFunction(subQueryBuilder)
const subQuery = subQueryBuilder.toQuery()
if (subQuery.graph.conditions.length === 0) {
return this
}
if (isAndExpression(subQuery.graph) && isAndExpression(this._expression)) {
this._expression.conditions.push(...subQuery.graph.conditions)
} else {
return this.appendCondition(subQuery.graph)
}
return this
}
appendCondition(condition: QueryTypes.Condition) {
assertNotFinalized(this)
if (!('operator' in this._expression)) {
if (this._appendNextConditionAsOr) {
this._expression = {
operator: 'OR',
conditions: this._expression.conditions
}
this._appendNextConditionAsOr = false
} else {
if (this._expression.conditions.length > 0) {
this._expression = {
operator: 'AND',
conditions: this._expression.conditions
}
}
}
this._expression.conditions.push(condition)
} else if (this._appendNextConditionAsOr) {
if (this._expression.operator === 'AND') {
const previousANDExpression = this._expression
this._expression = {
operator: 'OR',
conditions: [
previousANDExpression,
condition,
]
}
} else {
this._expression.conditions.push(condition)
}
this._appendNextConditionAsOr = false
} else if (this._expression.operator === 'OR') {
const lastCondition = this._expression.conditions[this._expression.conditions.length - 1]
if (QueryTypes.isFieldMatchNode(lastCondition)) {
const newAndCondition: QueryTypes.MatchExpressionMultiMatch = {
operator: 'AND',
conditions: [
lastCondition,
condition
]
}
this._expression.conditions[this._expression.conditions.length - 1] = newAndCondition
} else {
lastCondition.conditions.push(condition)
}
} else {
// this._expression.operator === 'AND'
this._expression.conditions.push(condition)
}
return this
}
toQuery(): QueryTypes.Query {
this._finalized = true
return { graph: this._expression, parameters: this._parameters.toQueryParameters(), options: this._matchOptions }
}
}
export class ScopedQueryBuilder extends QueryBuilder {
private _scopeField: string
private _scopeParameter: QueryTypes.QueryParameterReference
constructor(scope: QueryScope, { allowUnscopedQuery = false, ...matchOptions }: ScopedQueryMatchOptions = {} ) {
super(matchOptions)
if (scope) {
this._scopeField = scope.field
this._scopeParameter = this.addParameter(scope.value)
} else if (!allowUnscopedQuery) {
throw new Error('Missing query scope. Either supply it or use option allowUnscopedQuery option')
}
}
toQuery(): QueryTypes.Query {
if (this._scopeField) {
const query = super.toQuery()
ensureFieldMatchesParameter(query, this._scopeField, this._scopeParameter)
return query
} else {
return super.toQuery()
}
}
}
function ensureFieldMatchesParameter(query: QueryTypes.Query, field: string, parameter: QueryTypes.QueryParameterReference) {
const { graph } = query
const newFieldCondition: QueryTypes.FieldMatchNode = {
field,
term: parameter
}
if (isAndExpression(graph)) {
graph.conditions.unshift(newFieldCondition)
if (graph.conditions.length > 1) {
(graph as QueryTypes.MatchExpressionMultiMatch).operator = 'AND'
}
} else {
query.graph = {
operator: 'AND',
conditions: [
newFieldCondition,
{
operator: 'OR',
conditions: graph.conditions
}
]
}
}
}
class FieldCondition {
constructor(private _fieldPath: string, private _builder: QueryBuilder) {}
is(value: number|string|boolean) {
const parameter = this._builder.addParameter(value)
// const parameter = this._builder.addParameter(value.toString())
return this._isParameter(parameter)
}
isOneOf(values: (number|string|boolean)[]) {
const nonNullValues = values.filter(v => v != null)
const parameter = this._builder.addParameter(nonNullValues)
return this._isParameter(parameter)
}
contains(value: string) {
const parameter = this._builder.addParameter('*' + value + '*')
return this._isParameter(parameter)
}
containsOneOf(values: string[]) {
const parameter = this._builder.addParameter(values.map(v => '*' + v + '*'))
return this._isParameter(parameter)
}
startsWith(value: string) {
const parameter = this._builder.addParameter(value + '*')
return this._isParameter(parameter)
}
startsWithOneOf(values: string[]) {
const parameter = this._builder.addParameter(values.map(v => v + '*'))
return this._isParameter(parameter)
}
endsWith(value: string) {
const parameter = this._builder.addParameter('*' + value)
return this._isParameter(parameter)
}
endsWithOneOf(values: string[]) {
const parameter = this._builder.addParameter(values.map(v => '*' + v))
return this._isParameter(parameter)
}
private _isParameter(parameter: QueryTypes.QueryParameterReference) {
const condition = {
field: this._fieldPath,
term: parameter
}
return this._builder.appendCondition(condition)
}
isInRange(minValue: number|string, maxValue: number|string) {
const termMin = minValue == null ? '*' : minValue.toString()
const termMax = maxValue == null ? '*' : maxValue.toString()
const minVariable = this._builder.addParameter(termMin)
const maxVariable = this._builder.addParameter(termMax)
const condition: QueryTypes.Condition = {
field: this._fieldPath,
term_min: minVariable,
term_max: maxVariable,
}
return this._builder.appendCondition(condition)
}
isTruthy() {
return this.isInRange('*', '*')
}
}
function assertNotFinalized(object: { finalized: boolean } ) {
if (object.finalized) {
throw new Error('Cannot perform this operation when already finalized')
}
}
function isAndExpression(expression: QueryTypes.MatchExpression) {
if (!('operator' in expression)) {
return true
}
if (expression.operator === 'OR') {
return false
} else {
return true
}
}
// function isOrExpression(expression: QueryTypes.MatchExpression) {
// return !isAndExpression(expression)
// }
// function assert(condition: any, message?: string) {
// if (!condition) {
// throw new Error(message||'Assertion failed')
// }
// }
Query String Parser
The initial form parser is defined in PEG and parsed by PEG.js
query-parser.peg
peg
/*
* Lucene Query Grammar for PEG.js
* ========================================
*
* Original version: https://github.com/thoward/lucene-query-parser.js
* Slightly modified by Zachary Tong, 03/05/2013. Add prefix matching to fields
*
* This grammar supports many of the constructs contained in the Lucene Query Syntax.
*
* Supported features:
* - conjunction operators (AND, OR, ||, &&, NOT)
* - prefix operators (+, -)
* - quoted values ("foo bar")
* - named fields (foo:bar)
* - range expressions (foo:[bar TO baz], foo:{bar TO baz})
* - proximity search expressions ("foo bar"~5)
* - boost expressions (foo^5, "foo bar"^5)
* - fuzzy search expressions (foo~, foo~0.5)
* - parentheses grouping ( (foo OR bar) AND baz )
* - field groups ( foo:(bar OR baz) )
*
* The grammar will create a parser which returns an AST for the query in the form of a tree
* of nodes, which are dictionaries. There are three basic types of expression dictionaries:
*
* A node expression generally has the following structure:
*
* {
* 'left' : dictionary, // field expression or node
* 'operator': string, // operator value
* 'right': dictionary, // field expression OR node
* 'field': string // field name (for field group syntax) [OPTIONAL]
* }
*
*
* A field expression has the following structure:
*
* {
* 'field': string, // field name
* 'term': string, // term value
* 'prefix': string // prefix operator (+/-) [OPTIONAL]
* 'boost': float // boost value, (value > 1 must be integer) [OPTIONAL]
* 'similarity': float // similarity value, (value must be > 0 and < 1) [OPTIONAL]
* 'proximity': integer // proximity value [OPTIONAL]
* }
*
*
* A range expression has the following structure:
*
* {
* 'field': string, // field name
* 'term_min': string, // minimum value (left side) of range
* 'term_max': string, // maximum value (right side) of range
* 'inclusive': boolean // inclusive ([...]) or exclusive ({...})
* }
*
* Other Notes:
*
* - For any field name, unnamed/default fields will have the value "<implicit>".
* - Wildcards (fo*, f?o) and fuzzy search modifiers (foo~.8) will be part of the term value.
* - Escaping is not supported and generally speaking, will break the parser.
* - Conjunction operators that appear at the beginning of the query violate the logic of the
* syntax, and are currently "mostly" ignored. The last element will be returned.
*
* For example:
* Query: OR
* Return: { "operator": "OR" }
*
* Query: OR AND
* Return: { "operator": "AND" }
*
* Query: OR AND foo
* Return: { "left": { "field": "<implicit>", "term": "foo" } }
*
* To test the grammar, use the online parser generator at http://pegjs.majda.cz/online
*
*/
{
const REGEXP_ISO_DATE = /(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(.[0-9]+)?(Z)?/
}
start
= _* node:node+
{
return node[0];
}
/ _*
{
return {};
}
/ EOF
{
return {};
}
node
/*
= operator:operator_exp EOF
{
return {
'operator': operator
};
}
/ operator:operator_exp right:node
{
return right;
}
/
*/
= left:group_exp operator:operator_exp* right:node*
{
var node= {
'left':left
};
var right =
right.length == 0
? null
: right[0]['right'] == null
? right[0]['left']
: right[0];
if (right != null)
{
node['operator'] = operator==''? '<implicit>' : operator[0];
node['right'] = right;
}
return node;
}
group_exp
= field_exp:field_exp _*
{
return field_exp;
}
/ paren_exp
paren_exp
= "(" node:node+ ")" _*
{
return node[0];
}
field_exp
= op:prefix_operator_exp? fieldname:fieldname? range:range_operator_exp
{
range['field'] =
fieldname == ''
? "<implicit>"
: fieldname;
range['fieldPrefix'] = op;
return range;
}
/ op:prefix_operator_exp? fieldname:fieldname node:paren_exp
{
node['field'] = fieldname;
node['fieldPrefix'] = op;
return node;
}
/ op:prefix_operator_exp? fieldname:fieldname? term:term
{
var fieldexp = {
field:
fieldname == ''
? "<implicit>"
: fieldname,
fieldPrefix: op
};
for(var key in term)
fieldexp[key] = term[key];
return fieldexp;
}
fieldname
= fieldname:chainedFieldIdentifier ":" _*
{
return fieldname;
}
chainedFieldIdentifier
= head:fieldIdentifier "." tail:chainedFieldIdentifier
{
return head + '.' + tail;
}
/ fieldIdentifier
fieldIdentifier
= initialCharacter:[a-z_]i otherCharacters:[a-z0-9_]i* { return initialCharacter+otherCharacters.join(''); }
term
= op:prefix_operator_exp? term:quoted_term proximity:proximity_modifier? boost:boost_modifier? _*
{
var result = {
'term': term.term,
'termQuotes': term.termQuotes
};
if('' != proximity)
{
result['proximity'] = proximity;
}
if('' != boost)
{
result['boost'] = boost;
}
if('' != op)
{
result['termPrefix'] = op;
}
return result;
}
/ op:prefix_operator_exp? term:unquoted_term similarity:fuzzy_modifier? boost:boost_modifier? _*
{
var result = {
'term': term.term,
'termQuotes': term.termQuotes
};
if('' != similarity)
{
result['similarity'] = similarity;
}
if('' != boost)
{
result['boost'] = boost;
}
if('' != op)
{
result['termPrefix'] = op;
}
return result;
}
unquoted_term
= term:[0-9\-T\:Z]+ & { return REGEXP_ISO_DATE.test(term.join('')) } modifiers:[a-z0-9\/\+\-]i*
{
return {
term: term.join('') + modifiers.join(''),
termQuotes: 'none'
}
}
/ !operator term:[^: \t\r\n\f\{\}()"'^~\[\]]+
{
return {
term: term.join(''),
termQuotes: 'none'
};
}
quoted_term
= quoted_term_double / quoted_term_single
quoted_term_single
= "'" term:[^']+ "'"
{
return {
term: term.join(''),
termQuotes: 'single'
};
}
quoted_term_double
= '"' term:[^"]+ '"'
{
return {
term: term.join(''),
termQuotes: 'double'
};
}
proximity_modifier
= '~' proximity:int_exp
{
return proximity;
}
boost_modifier
= '^' boost:decimal_or_int_exp
{
return boost;
}
fuzzy_modifier
= '~' fuzziness:decimal_exp?
{
return fuzziness == '' ? 0.5 : fuzziness;
}
decimal_or_int_exp
= decimal_exp
/ int_exp
decimal_exp
= '0.' val:[0-9]+
{
return parseFloat("0." + val.join(''));
}
int_exp
= val:[0-9]+
{
return parseInt(val.join(''));
}
range_operator_exp
= '[' term_min:unquoted_term _* 'TO' _+ term_max:unquoted_term ']'
{
return {
'term_min': term_min.term,
'term_max': term_max.term,
'inclusive': true
};
}
/ '{' term_min:unquoted_term _* 'TO' _+ term_max:unquoted_term '}'
{
return {
'term_min': term_min.term,
'term_max': term_max.term,
'inclusive': false
};
}
operator_exp
= _* !double_operator operator:operator _+
{
return operator;
}
/ _* operator:operator EOF
{
return operator;
}
operator
= 'AND NOT' { return 'NOT' }
/ 'OR'
/ 'AND'
/ 'NOT'
/ '||' { return 'OR'; }
/ '&&' { return 'AND'; }
double_operator
= operator _ operator
prefix_operator_exp
= _* operator:prefix_operator
{
return operator;
}
prefix_operator
= '+'
/ '-'
_ "whitespace"
= [ \t\r\n\f]+
EOF
= !.
The initial parser defined outputs a simple left/right graph which is post-processed by a query refiner.
query-refined.spec.js and refining.ts
js
import test from 'ava'
import { parse, lowlevelParse } from '../../src/model/query/parse.js'
import { QueryRefiner } from '../../src/model/query/refining.js'
test('Refine a one-term graph', t => {
const fullyParsedAndRefined = parse('f.id:FORM')
const parsed = parseWithoutRefine('f.id:FORM')
const { graph, parameters } = QueryRefiner.process(parsed)
t.deepEqual(fullyParsedAndRefined.graph, graph)
t.deepEqual(fullyParsedAndRefined.parameters, parameters)
t.falsy(graph.operator)
t.is(graph.conditions.length, 1)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
t.falsy('left' in graph.conditions[0], 'Should not have a left condition anymore')
})
test('Not refining twice', t => {
const parsed = parseWithoutRefine('f.id:FORM')
const { graph } = QueryRefiner.process(parsed)
t.throws(() => QueryRefiner.process(graph))
})
test('parse and refine a negated field', t => {
const { graph } = parse('data.name:hallo NOT test')
const condition = graph.conditions[1]
t.is(condition.termPrefix, '-')
})
test('parse and refine a negated field with name', t => {
const { graph } = parse('data.name:hallo NOT field:test')
console.dir(graph, { depth: null })
const condition = graph.conditions[1]
t.is(condition.termPrefix, '-')
})
test('parse and refine a negated field with name combined with NOT instead of AND', t => {
const { graph } = parse('data.name:hallo NOT -test')
console.dir(graph, { depth: null })
const condition = graph.conditions[1]
t.not(condition.fieldPrefix, '-')
t.not(condition.termPrefix, '-')
})
test('refine a negated field (-) and NOT operator', t => {
const { graph } = parse('data.name:hallo NOT -test2')
t.is(graph.operator, 'AND')
const condition = graph.conditions[1]
t.is(condition.fieldPrefix, '')
t.not(condition.termPrefix, '-')
})
test('Not refining with implicit operators', t => {
t.throws(() => QueryRefiner.process(
{
left: {
field: 'f.id',
fieldPrefix: null,
term: 'FORM',
termQuotes: 'none',
similarity: null,
boost: null,
termPrefix: null,
},
operator: '<implicit>',
right: {
field: 'data.test',
fieldPrefix: null,
term: 'a',
termQuotes: 'none',
similarity: null,
boost: null,
termPrefix: null,
},
}),
)
})
test('Not refining unknown nodes', t => {
t.throws(() => QueryRefiner.process({ left: 'a string' }))
})
test('Not refining with non terms', t => {
t.throws(() => QueryRefiner.process(
{
left: {
field: 'f.id',
fieldPrefix: null,
},
}),
)
})
test('Refine a two-term graph', t => {
const parsed = parseWithoutRefine('f.id:FORM p:PARENT')
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 2)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right)
})
test('Refine a three-term graph', t => {
const parsed = parseWithoutRefine('f.id:FORM p:PARENT data.name:NAME')
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right.left)
deepEqualCondition(t, parameters, graph.conditions[2], parsed.right.right)
})
test('Refine a graph with a range', t => {
const parsed = parseWithoutRefine('data.age:[* TO *]')
const { graph, parameters } = QueryRefiner.process(parsed)
console.dir(graph, { depth: null })
t.is(graph.conditions.length, 1)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
})
test('Refine a large graph using just AND', t => {
const conditions = buildConditions(100)
const parsed = parseWithoutRefine(conditions.join(' '))
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, conditions.length)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right.left)
deepEqualCondition(t, parameters, graph.conditions[2], parsed.right.right.left)
deepEqualCondition(t, parameters, graph.conditions[3], parsed.right.right.right.left)
})
test('Refine a large graph using just AND but braces', t => {
const leftConditions = buildConditions(10)
const rightConditions = buildConditions(5)
const parsed = parseWithoutRefine(`(${leftConditions.join(' ')}) AND (${rightConditions.join(' ')})`)
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, leftConditions.length + rightConditions.length)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left.left)
deepEqualCondition(t, parameters, graph.conditions[1], parsed.left.right.left)
deepEqualCondition(t, parameters, graph.conditions[2], parsed.left.right.right.left)
deepEqualCondition(t, parameters, graph.conditions[3], parsed.left.right.right.right.left)
})
test('Refine a large graph using OR to combine two queries with only AND in each', t => {
const leftConditions = buildConditions(3)
const rightConditions = buildConditions(3)
const parsedRight = parseWithoutRefine(rightConditions.join(' '))
const parsed = parseWithoutRefine(`(${leftConditions.join(' ')}) OR (${rightConditions.join(' ')})`)
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'OR')
t.is(graph.conditions.length, 2)
t.is(graph.conditions[0].conditions.length, leftConditions.length)
t.is(graph.conditions[1].conditions.length, rightConditions.length)
t.is(graph.conditions[0].operator, 'AND')
deepEqualCondition(t, parameters, graph.conditions[0].conditions[0], parsed.left.left)
t.is(graph.conditions[1].operator, 'AND')
deepEqualCondition(t, parameters, graph.conditions[1].conditions[0], parsedRight.left)
})
test('Refine AND paired with some ORs', t => {
const parsed = parseWithoutRefine('f.id:FORM p:PARENT AND (data.name:A OR data.name:B OR data.name:C)')
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
// console.dir(matches, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right.left)
deepEqualCondition(t, parameters, graph.conditions[2].operator, 'OR')
t.is(graph.conditions[2].conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[2].conditions[0], parsed.right.right.left)
deepEqualCondition(t, parameters, graph.conditions[2].conditions[1], parsed.right.right.right.left)
deepEqualCondition(t, parameters, graph.conditions[2].conditions[2], parsed.right.right.right.right)
})
test('Refine complicated pairing', t => {
const parsed = parseWithoutRefine('(f.id:FORM OR (p:PARENT OR p:PARENT_B)) AND (data.name:A OR data.name:B OR ((data.name:C AND data.field:FOO data.fieldB:FOO2) OR data.more:BAR))')
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
// console.dir(matches, { depth: null })
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 2)
t.is(graph.conditions[0].operator, 'OR')
t.is(graph.conditions[0].conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[0].conditions[0], parsed.left.left)
deepEqualCondition(t, parameters, graph.conditions[0].conditions[1], parsed.left.right.left)
deepEqualCondition(t, parameters, graph.conditions[0].conditions[2], parsed.left.right.right)
t.is(graph.conditions[1].operator, 'OR')
t.is(graph.conditions[1].conditions.length, 4)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[0], parsed.right.left)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[1], parsed.right.right.left)
t.is(graph.conditions[1].conditions[2].operator, 'AND')
t.is(graph.conditions[1].conditions[2].conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[2].conditions[0], parsed.right.right.right.left.left)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[2].conditions[1], parsed.right.right.right.left.right.left)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[2].conditions[2], parsed.right.right.right.left.right.right)
deepEqualCondition(t, parameters, graph.conditions[1].conditions[3], parsed.right.right.right.right)
})
test('Refine a two term graph using NOT', t => {
const parsed = parseWithoutRefine('f.id:FORM AND NOT p:PARENT')
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 2)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
t.falsy(graph.conditions[0].termPrefix)
t.is(graph.conditions[1].termPrefix, '-')
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right)
})
test('Refine a three term graph using AND and NOT', t => {
const parsed = parseWithoutRefine('f.id:FORM AND NOT p:PARENT AND a:ANCESTOR')
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
t.is(graph.conditions[1].termPrefix, '-')
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right.left)
deepEqualCondition(t, parameters, graph.conditions[2], parsed.right.right)
})
test('Refine another three term graph using AND and two NOTs', t => {
const parsed = parseWithoutRefine('f.id:FORM AND NOT p:PARENT AND NOT a:ANCESTOR')
// console.dir(parsed, { depth: null })
const { graph, parameters } = QueryRefiner.process(parsed)
t.is(graph.operator, 'AND')
t.is(graph.conditions.length, 3)
deepEqualCondition(t, parameters, graph.conditions[0], parsed.left)
t.is(graph.conditions[1].termPrefix, '-')
deepEqualCondition(t, parameters, graph.conditions[1], parsed.right.left)
t.is(graph.conditions[2].termPrefix, '-')
deepEqualCondition(t, parameters, graph.conditions[2], parsed.right.right)
})
test('Refine another three term graph using AND and NOTs with parentheses', t => {
t.throws(() => parseWithoutRefine('f.id:FORM NOT (p:PARENT AND a:ANCESTOR)'))
// console.dir(parsed, { depth: null })
})
function buildConditions(count) {
const conditions = []
for (let i = 0; i < count; i += 1) {
conditions.push(`data.field${i}:${i}`)
}
return conditions
}
function parseWithoutRefine(query) {
return lowlevelParse(query)
}
function deepEqualCondition(t, parameters, node, rawNode) {
const _node = clone(node)
const _rawNode = clone(rawNode)
if (_node.term) {
const processedTerm = parameters[_node.term.id]
const rawTerm = _rawNode.term
t.deepEqual(processedTerm, rawTerm)
}
if (_node.term_min) {
const processedTerm = parameters[_node.term_min.id]
const rawTerm = _rawNode.term_min
t.deepEqual(processedTerm, rawTerm)
}
if (_node.term_max) {
const processedTerm = parameters[_node.term_max.id]
const rawTerm = _rawNode.term_max
t.deepEqual(processedTerm, rawTerm)
}
delete _node.term
delete _node.term_min
delete _node.term_max
delete _rawNode.term
delete _rawNode.term_min
delete _rawNode.term_max
t.deepEqual(_node, _rawNode)
}
function clone(object) {
return JSON.parse(JSON.stringify(object))
}
ts
import { assert } from '@aeppic/shared/assert'
import { Condition, ExplicitOperator, isIntermediaryNode, isLeafNode, MatchExpression, Query, QueryGraphNode, QueryParameters, RawFieldMatchNode, RawParsedOperator } from './types.js'
export class QueryRefiner {
static process(queryGraph: QueryGraphNode): Query {
if ('conditions' in queryGraph) {
throw new Error('Already optimized')
}
// console.dir(queryGraph, { depth: null })
const parameters: QueryParameters = {}
const matchExpression = QueryRefiner._refineNode(queryGraph, parameters)
// console.dir(matchExpression, { depth: null })
return {
graph: matchExpression,
parameters,
}
}
private static _refineNode(node: QueryGraphNode, parameters: QueryParameters): MatchExpression {
if (!node) {
return null
}
if (typeof node !== 'object') {
throw new Error('node must be an object')
}
if (isIntermediaryNode(node)) {
if (node.operator === '<implicit>') {
throw new Error('<implicit> operator not allowed in optimization needs to be replaced first')
}
const leftConditions = this._refineNode(node.left, parameters)
const rightConditions = this._refineNode(node.right, parameters)
return joinMatchExpressions(leftConditions, rightConditions, node.operator)
} else {
assert(isLeafNode(node))
const condition = toFinalCondition(node, parameters)
return { conditions: [condition] }
}
}
}
export function toFinalCondition(node: RawFieldMatchNode, parameters: QueryParameters) {
if ('term' in node) {
const newParameterId = Object.keys(parameters).length
parameters[newParameterId] = node.term
return {
...node as any, term: { id: newParameterId }
}
} else if ('term_min' in node && 'term_max' in node) {
const newMinParameterId = Object.keys(parameters).length
parameters[newMinParameterId] = node.term_min
const newMaxParameterId = Object.keys(parameters).length
parameters[newMaxParameterId] = node.term_max
return {
...node as any, term_min: { id: newMinParameterId }, term_max: { id: newMaxParameterId }
}
}
throw new Error('Not implemented')
}
export function joinMatchExpressions(left: MatchExpression, right: MatchExpression, operator: RawParsedOperator = 'AND') {
const conditions = []
if (left) {
conditions.push(...readConditions(left, operator))
}
if (right) {
conditions.push(...readConditions(right, operator))
}
if (conditions.length <= 1) {
return { conditions }
} else {
return { operator, conditions }
}
}
function readConditions(match: MatchExpression, currentOperator: RawParsedOperator): Condition[] {
if (match.conditions.length <= 1) {
return match.conditions
}
assert('operator' in match)
if ('operator' in match) {
if (match.operator === currentOperator) {
return match.conditions
} else {
return [match]
}
}
}
Query Matcher
Takes a query graph and allows to test documents against it to find out whether a document matches the query or not.
query-matcher.spec.js and match.ts
js
'use strict'
const test = require('tape')
// const util = require('util')
const { Querying: { QueryMatcher, GlobalQueryCache, Scopes, parseQueryAndFilter, toSafeFieldAccess, identifySubGraphsInQuery, isQuery } } = require('../../dist/aeppic.cjs')
const SIMPLE_QUERY = 'p:2323'
const QUERY = 'data.name:hallo data.age:1'
const ALL_FIELDS_QUERY = '*hallo*'
const FILTER = 'f.id:form p:2323'
const FILTER_ONE_TERM = 'f.id:form'
// const QUERY_SIMPLE_OR = 'data.name:hallo OR data.age:999'
// const QUERY_SIMPLE_OR_NESTED = 'data.name:hallo AND (data.age:2 OR data.age:999)'
// const QUERY_SIMPLE_OR_DEEPLY_NESTED = '(data.name:hallo OR data.name:hallo2) AND (data.age:2 OR data.age:999)'
const QUERY_SIMPLE_SUFFIX_WILDCARD = 'data.name:Hal*'
const QUERY_SIMPLE_FULL_WILDCARD = 'data.name:*hal*'
// const QUERY_WITH_OUTER_BRACKETS_NESTED = '(a:1 f.id:form OR a:2 data.name:hallo)'
const MATCHING_OBJECT = {
p: '2323',
f: {
id: 'form'
},
data: {
name: 'hallo',
age: 1,
}
}
const MATCHING_OBJECT_WITH_ARRAY = {
p: '2323',
f: {
id: 'form'
},
data: {
name: ['not-hallo', 'hallo'],
age: 1,
}
}
const MISMATCHING_OBJECT = {
test: 'some other value',
f: {
id: 'form'
},
data: {
name: 'world'
}
}
const UPPERCASE_TEST = {
test: 'some other value',
data: {
name: 'This is a world that truly needs some ARS and RLD'
}
}
const UPPERCASE_TEST_NO_MATCH = {
test: 'some other value',
data: {
name: 'I say ars is not really a sensible word'
}
}
test('Build a one-term query matcher', (t) => {
const matcher = buildQueryMatcher('data.field:1')
t.ok(matcher, 'can build a function')
t.equal(Object.keys(matcher.query.parameters).length, 1)
t.end()
})
test('Safe Field Access Rewrite', (t) => {
t.equal(toSafeFieldAccess('data.icon.dataUrl'), 'document.data.icon?.dataUrl')
t.equal(toSafeFieldAccess('id'), 'document.id')
t.equal(toSafeFieldAccess('data.f'), 'document.data.f')
t.equal(toSafeFieldAccess('data.f.text'), 'document.data.f?.text')
t.end()
})
test('build a query test function', (t) => {
t.plan(3)
const fn = buildQueryMatchFunction(QUERY, FILTER)
t.ok(fn, 'can build a function')
t.ok(fn(MATCHING_OBJECT), 'matching the object')
t.notOk(fn(MISMATCHING_OBJECT), 'not matching the object')
})
test('build a query test function with a single term component', (t) => {
t.plan(3)
const fn = buildQueryMatchFunction(QUERY, FILTER_ONE_TERM)
t.ok(fn, 'can build a function')
t.ok(fn(MATCHING_OBJECT), 'matching the object')
t.notOk(fn(MISMATCHING_OBJECT), 'not matching the object')
})
test('Reuse a query if the shape of the query is identical', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('data.name:HALLO')
const matcher2 = buildQueryMatcher('data.name:BYE-BYE')
const fn1 = matcher1.shapeId
const fn2 = matcher2.shapeId
t.equal(fn1, fn2, 'Tester should use exact same shape')
t.end()
})
test('Test a document against an empty query', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('')
t.ok(matcher1.match({ data: { name: 'test' } }), 'Empty match function matches all documents')
t.end()
})
test('Different shape is query is identical but filter isnt', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('data.name:HALLO')
const matcher2 = buildQueryMatcher('data.name:BYE-BYE', 'data.someOtherField:"NOTHING_SPECIAL"')
t.notEqual(matcher1.shapeId, matcher2.shapeId, 'Tester should use different same shape')
t.end()
})
test('reusing a query test function vs re-building it', (t) => {
GlobalQueryCache.reset()
measure(t, 'Always a new matcher', (i) => {
const match = buildQueryMatchFunction(QUERY, `data.age:[1 TO ${1 + i}]`)
match(MATCHING_OBJECT)
})
measure(t, 'Same matcher', () => {
const match = buildQueryMatchFunction(QUERY, 'data.age:[1 TO 1]')
match(MATCHING_OBJECT)
})
measure(t, 'Always a new matcher using query builder', (i) => {
const q = Scopes.global().where('data.age').isInRange(1, 1 + i).toQuery()
const matcher = new QueryMatcher(q)
matcher.match(MATCHING_OBJECT)
})
t.end()
})
test('build a query test function with DIFFERENT shape over and over again', (t) => {
GlobalQueryCache.reset()
measure(t, 'Different Shape builds', (i) => {
buildQueryMatchFunction(QUERY, 'data.something:"x"', `data.field${i}:${i} AND data.otherField:${i} OR data.andAnotherField1:${i} AND data.andAnotherField2:${i} data.andAnotherField3:${i}`)
})
t.end()
})
test('build a query test function with SAME shape over and over again', (t) => {
GlobalQueryCache.reset()
measure(t, 'Same Shape builds', (i) => {
buildQueryMatchFunction(QUERY, 'data.something:"x"', `data.field:${i} AND data.otherField:${i} OR data.andAnotherField1:${i} AND data.andAnotherField2:${i} data.andAnotherField3:${i}`)
})
t.end()
})
test('Use Query Tester against document', (t) => {
const testFunction = buildQueryMatchFunction(QUERY)
measure(t, 'Test matching document', () => {
testFunction(MATCHING_OBJECT)
})
t.end()
})
function measure(t, name, functionToTest) {
const WARMUP_COUNT = 100
const COUNT = 10000
for (let i = 0; i < WARMUP_COUNT; i += 1) {
functionToTest(i + 1234567)
}
const start = process.hrtime.bigint()
for (let i = 0; i < COUNT; i += 1) {
functionToTest(i)
}
const end = process.hrtime.bigint()
const diffInNanoseconds = end - start
const totalDurationInMs = Number(diffInNanoseconds / BigInt(1000000))
const durationInMsPerCall = totalDurationInMs / COUNT
const COMPARISON_TIME_FRAME_IN_MS = 1000
const NUMBER_OF_CALLS_WE_CAN_GET_DONE_IN_DESIRED_TIME =
Math.floor(COMPARISON_TIME_FRAME_IN_MS / durationInMsPerCall)
t.comment(`${name}: ${durationInMsPerCall}ms/call - ${totalDurationInMs}ms`)
t.comment(`Maximum number of calls we can perform in ${COMPARISON_TIME_FRAME_IN_MS}ms is ${NUMBER_OF_CALLS_WE_CAN_GET_DONE_IN_DESIRED_TIME}`)
}
test('simple query with empty filter', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(SIMPLE_QUERY, '')
// debugPrint(tester.queries)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the object')
})
test('simple query with empty filter', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(SIMPLE_QUERY, '')
// debugPrint(tester.queries)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the object')
})
test('query tester handles simple SUFFIX WILDCARD queries', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_SUFFIX_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the mismatching object')
})
test('query tester handles simple SUFFIX WILDCARD queries for arrays too', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_SUFFIX_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.ok(match(MATCHING_OBJECT_WITH_ARRAY), 'matching the object')
})
test('query tester handles simple FULL WILDCARD queries for arrays too', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_FULL_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.ok(match(MATCHING_OBJECT_WITH_ARRAY), 'matching the object')
})
test('query tester handles arrays inside field expressions in documents', (t) => {
const match = buildQueryMatchFunction('stamps.id:stamp-a OR data.refs.text:A*')
t.ok(match({ data: { refs: [{ text: 'atest' }] } }))
t.ok(match({ data: { refs: { text: 'abc' } } }))
t.ok(match({ data: { refs: [{ text: 'xyz' }, { text: 'abc' }] } }))
t.ok(match({ stamps: [{ id: 'stamp-a' }], data: { refs: { text: 'abc' } } }))
t.ok(match({ stamps: [{ id: 'stamp-b' }], data: { refs: { text: 'abc' } } }))
t.notOk(match({ stamps: [{ id: 'stamp-b' }], data: { refs: { text: 'SOMETHING' } } }))
t.notOk(match({ stamps: [{ id: 'stamp-c' }], data: { refs: { text: 'NOTHING' } } }))
t.end()
})
test('query tester looking for truthy with simple *', (t) => {
const match = buildQueryMatchFunction('data.a:*')
t.ok(match({ data: { a: 1 } }))
t.ok(match({ data: { a: 'TEST' } }))
t.ok(match({ data: { a: { id: 'ref-a', text: 'test', v: '' } } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.notOk(match({ data: { a: '' } }))
t.end()
})
test('query tester looking for truthy with simple *', (t) => {
const match = buildQueryMatchFunction('data.a:[* TO *]')
t.ok(match({ data: { a: 1 } }))
t.ok(match({ data: { a: true } }))
t.ok(match({ data: { a: 'TEST' } }))
t.ok(match({ data: { a: { id: 'ref-a', text: 'test', v: '' } } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: false } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.notOk(match({ data: { a: '' } }))
t.end()
})
test('query tester looking for boolean true', (t) => {
const match = buildQueryMatchFunction('data.flag:true')
t.ok(match({ data: { flag: true } }))
t.notOk(match({ data: { flag: false } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: 1 } }))
t.notOk(match({ data: { a: '' } }))
t.notOk(match({ data: { a: 'test' } }))
t.notOk(match({ data: { a: { id: 'test', text: '', v: '' } } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.end()
})
test('query tester looking for boolean false', (t) => {
const match = buildQueryMatchFunction('data.flag:false')
t.notOk(match({ data: { flag: true } }))
t.ok(match({ data: { flag: false } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: 1 } }))
t.notOk(match({ data: { a: '' } }))
t.notOk(match({ data: { a: 'test' } }))
t.notOk(match({ data: { a: { id: 'test', text: '', v: '' } } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.end()
})
test('query tester can search in all fields with wildcard (including .text in refs)', (t) => {
const query = parseQueryAndFilter('A*', { searchAllFields: true })
const matcher = new QueryMatcher(query)
const match = (d) => matcher.match(d)
t.ok(match({ data: { f: 'a' } }))
t.ok(match({ data: { name: 'a' } }))
t.ok(match({ data: { f: ['b', 'a'] } }))
t.ok(match({ data: { f: ['b', 'abc'] } }))
t.ok(match({ data: { refs: [{ id: 'abc', text: 'atest' }] } }))
t.ok(match({ data: { refs: { id: 'abc', text: 'abc' } } }))
t.notOk(match({ data: { refs: { id: '', text: 'abc' } } }), 'Will NOT find refs without id value set')
t.ok(match({ data: { refs: [{ text: 'xyz' }, { id: 'a', text: 'abc' }] } }))
t.notOk(match({ data: { refs: [{ text: 'xyz' }, { id: 'abc' }] } }), 'Doesn\'t wildcard match in id')
t.ok(match({ data: { f: 'a' } }))
t.end()
})
test('query wildcard does NOT match internal word matches', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:allo*')
t.notOk(match(MATCHING_OBJECT), 'should not have matched the object (as it is hallo not allo)')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the mismatching object')
})
test('query wildcard with matches lowercase/uppercase content', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:ARS*')
t.ok(match(UPPERCASE_TEST), 'matching the UPPERCASE document')
t.ok(match(UPPERCASE_TEST_NO_MATCH), 'also matching the lowercase variant')
})
test('query wildcard with all lowercase matches both lowercase and uppercase content', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:ars*')
t.ok(match(UPPERCASE_TEST), 'matching the UPPERCASE document')
t.ok(match(UPPERCASE_TEST_NO_MATCH), 'matching the lowercase variant')
})
test('Query with simple NOT', (t) => {
t.plan(1)
const match = buildQueryMatchFunction('data.name:hallo NOT data.age:1')
t.notOk(match(
{
data: {
name: 'hallo',
age: 1,
}
}), 'not matching due to age')
})
test('Query with simple NOT of one term', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('p:parent data.name:hallo NOT data.age:1 AND data.text:frank')
t.ok(match(
{
p: 'parent',
data: {
name: 'hallo',
age: 2,
text: 'frank'
}
}), 'matching since age is different and text matches (NOT only applies to next term)')
t.notOk(match(
{
data: {
name: 'hallo',
age: 1,
text: 'frank'
}
}), 'not matching due to age')
})
test('Query with simple NOT of two terms', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('p:parent data.name:hallo NOT data.age:1 NOT data.text:frank')
t.ok(match(
{
p: 'parent',
data: {
name: 'hallo',
age: 2,
text: 'XYZ'
}
}), 'matching since age is different and text is different')
t.notOk(match(
{
data: {
name: 'hallo',
age: 5,
text: 'frank'
}
}), 'not matching due to text')
})
test('Query with nested NOT', (t) => {
t.plan(1)
t.throws(() => buildQueryMatchFunction('data.name:hallo NOT (data.age:1 data.text:frank)'))
})
test('query tester handles OR queries', (t) => {
t.plan(9)
const match = buildQueryMatchFunction('data.name:hallo OR data.age:999')
t.ok(match({ data: { name: 'hallo', age: 999 } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: 4 } }))
t.ok(match({ data: { name: 'test', age: 999 } }))
t.ok(match({ data: { name: 'test', age: 999 } }))
t.notOk(match({ data: { name: 'test', age: 9999 } }))
t.notOk(match({ data: { name: 'test' } }))
t.ok(match({ data: { name: 'hallo' } }))
})
test('query tester handles nested OR queries', (t) => {
t.plan(6)
const match = buildQueryMatchFunction('data.name:hallo AND (data.age:2 OR data.age:999)')
t.ok(match({ data: { name: 'hallo', age: 999 } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: 2 } }))
t.notOk(match({ data: { name: 'hallo', age: 1 } }))
t.notOk(match({ data: { name: 'hallo-x', age: 2 } }))
t.notOk(match({ data: { name: 'hallo-x', age: 999 } }))
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(6)
const match = buildQueryMatchFunction('data.name:hallo OR data.age:999 data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor BOTH conditions after OR (which have an implicit AND)')
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(7)
const match = buildQueryMatchFunction('(data.name:hallo OR data.age:999) data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor the last condition')
t.ok(match({ data: { name: 'hallo', age: 0, more: ['this-matches', 'different'] } }), 'more matches here as the suffix query sees the non-letter character in front as beginning of a token')
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(3)
const match = buildQueryMatchFunction('data.more:"m*"')
t.notOk(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'm*' } }))
t.notOk(match({ data: { name: 'hallo', age: 999, more: 'test-m*' } }))
})
test('query tester detects range queries', (t) => {
const FORM_AND_DATE_QUERY = 'f.id:FORM data.date:[* TO now+1d/d]'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY)
const oldDate = new Date(1800, 1, 1).toISOString()
const futureDate = new Date(new Date().getFullYear() + 1, 1, 1).toISOString()
t.ok(match({ f: { id: 'FORM' }, data: { date: oldDate } }))
t.notOk(match({ f: { id: 'FORM' }, data: { date: futureDate } }))
t.end()
})
test('query tester testing range of numbers', (t) => {
const FORM_AND_DATE_QUERY = 'f.id:FORM data.age:{10 TO 99}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY)
t.notOk(match({ f: { id: 'FORM' }, data: { age: 3 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 10 } }))
t.ok(match({ f: { id: 'FORM' }, data: { age: 11 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 99 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 100 } }))
t.notOk(match({ data: { age: 50, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester testing range of dates', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:[2012-08-01T05:00:00Z TO 2015-08-05T00:00:00Z]'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:01Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:01+01:00' } }))
t.end()
})
test('query tester testing range of dates (exclusive range)', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:{2012-08-01T05:00:00Z TO 2015-08-05T00:00:00Z}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:01Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:00-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-10T00:00:00+00:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:00' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2035-08-05T00:00:00+00:30' } }))
t.notOk(match({ data: { aDate: now, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester testing range of dates with rounding', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:{2012-08-01T05:00:00Z/d TO 2015-08-05T00:00:00Z}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:00+04:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01+04:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00+05:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T00:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T00:00:01Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+06:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-10T00:00:00+00:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:00' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2035-08-05T00:00:00+00:30' } }))
t.notOk(match({ data: { aDate: now, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester reusing shape', (t) => {
t.ok(compareShapes('p:PARENT_1', 'p:PARENT_2'))
t.ok(compareShapes('p:PARENT_1', 'p:PARENT_1'))
t.notOk(compareShapes('p:PARENT_1', 'a:PARENT_1'))
t.notOk(compareShapes('data.field:test*', 'data.field:test'), 'Wildcard queries should be handled differently')
t.ok(compareShapes('data.field:test*', 'data.field:*test'), 'Wildcard queries have same shape for now')
t.notOk(compareShapes('data.field:[* TO 4]', 'data.field:test'), 'range is different to normal terms')
t.ok(compareShapes('data.field:[* TO 4]', 'data.field:[6 TO 9]'), 'range on same field is comparable')
t.notOk(compareShapes('data.field:{* TO 4}', 'data.field:[6 TO 9]'), 'range on same field is NOT comparable with different inclusion boundaries')
t.ok(compareShapes('data.field:true', 'data.field:false'), 'boolean comparisons are same')
t.notOk(compareShapes('data.field:\'false\'', 'data.field:false'), 'boolean comparisons are different to string')
t.notOk(compareShapes('data.field:*', 'data.field:*dsd'), 'any field value is not same as prefix wildcard')
t.notOk(compareShapes('data.field:*', 'data.field:[* TO *]'), 'any field value is NOT the same as open range as termQuotes are not set by query parser')
t.end()
})
function compareShapes(query1, query2) {
const matcher1 = buildQueryMatcher(query1, '')
const matcher2 = buildQueryMatcher(query2, '')
if (matcher1.shapeId == null) {
throw new Error('matcher1 shape is null/undefined')
}
if (matcher2.shapeId == null) {
throw new Error('matcher2 shape is null/undefined')
}
return matcher1.shapeId === matcher2.shapeId
}
test('query reusing same match function hopefully', (t) => {
const matcher1 = buildQueryMatcher('data.field:A')
const matcher2 = buildQueryMatcher('data.field:B')
t.equal(matcher1.matchFunctionId, matcher2.matchFunctionId)
const matcher3 = buildQueryMatcher('data.field:B*')
t.notEqual(matcher1.matchFunctionId, matcher3.matchFunctionId)
t.end()
})
test('Simple OR query', (t) => {
const match = buildQueryMatchFunction('name:hallo OR age:999')
t.ok(match({ name: 'hallo' }))
t.ok(match({ age: 999 }))
t.ok(match({ name: 'hallo', age: 999 }))
t.ok(match({ name: 'xhallo', age: 999 }))
t.ok(match({ name: 'hallo', age: 9990 }))
t.notOk(match({ name: 'hallox', age: 9990 }))
t.notOk(match({ age: 2 }))
t.notOk(match({ name: 'x' }))
t.notOk(match({ name: 'x', data: { name: 'hallo' } }))
t.end()
})
test('Nested OR query', (t) => {
const match = buildQueryMatchFunction('(name:hallo title: ms) (country:NZ OR age:45 OR language:EN)')
t.ok(match({ name: 'hallo', title: 'ms', country: 'nz' }))
t.ok(match({ name: 'hallo', title: 'ms', age: 45 }))
t.ok(match({ name: 'hallo', title: 'ms', language: 'en' }))
t.ok(match({ name: 'hallo', title: 'ms', country: 'de', age: 45 }))
t.notOk(match({ name: 'hallo', title: 'mx', age: 95 }))
t.notOk(match({ name: 'hallo', title: 'ms', age: 95 }))
t.notOk(match({ name: 'hallox', title: 'ms', age: 95 }))
t.notOk(match({ name: 'hallox', title: 'ms' }))
t.notOk(match({ name: 'hallox', title: 'ms', country: 'DE' }))
t.end()
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(7)
const match = buildQueryMatchFunction('(data.name:hallo OR data.age:999) data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor the last condition')
t.ok(match({ data: { name: 'hallo', age: 0, more: ['this-matches', 'different'] } }), 'more matches here as the suffix query sees the non-letter character in front as beginning of a token')
})
test('query can use fluent api', (t) => {
const q = Scopes.global().where('data.age').isInRange(1, 2).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query match date range', (t) => {
const q = Scopes.global().where('data.age').isInRange(1, 2).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for open ranges', (t) => {
const q = Scopes.global().where('data.age').isInRange(1).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for isOneOf', (t) => {
const q = Scopes.global().where('data.age').isOneOf([1, 2]).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for isOneOf for strings', (t) => {
const q = Scopes.global().where('data.name').isOneOf(['something', 'hallo']).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('QueryMatcher requires a query', t => {
t.throws(() => QueryMatcher(null))
t.end()
})
function buildQueryMatcher(queryString, filterString) {
const query = parseQueryAndFilter(queryString, filterString)
return new QueryMatcher(query)
}
function buildQueryMatchFunction(queryString, filterString, now) {
const matcher = buildQueryMatcher(queryString, filterString, { nowUtcTz: now })
// console.log('QUERY', queryString, filterString)
// console.log(matcher.shapeId)
// console.dir(matcher.query)
return (d) => matcher.match(d)
}
// test('query tester prioritize', (t) => {
// t.plan(7)
// const match = buildQueryTestFunction(QUERY_SIMPLE_OR_NESTED, FILTER)
// debugPrint(tester.queries)
// t.equal(tester.queries.length, 2, 'should have detected 2 query sets')
// const q1 = tester.queries[0]
// t.equal(q1[0].field, 'data.name', 'name')
// t.equal(q1[1].field, 'data.age', 'age')
// t.equal(q1[1].term, '2', '2')
// const q2 = tester.queries[1]
// t.equal(q2[0].field, 'data.name', 'name')
// t.equal(q2[1].field, 'data.age', 'age')
// t.equal(q2[1].term, '999', '999')
// })
test('identify `root` parent in query', (t) => {
t.plan(2)
const { a, p } = identifySubGraphsInQuery('p:root')
t.deepEqual(a, [], 'No ancestor query term')
t.deepEqual(p, ['root'], 'A single parent query term')
})
test('identify `apps` and `system` ancestor part in OR query', (t) => {
t.plan(2)
const { a, p } = identifySubGraphsInQuery('a:apps OR a:system')
t.deepEqual(a, ['apps', 'system'], 'apps and system have been identified')
t.deepEqual(p, [], 'no parents have been queried')
})
test('identify `root` and `system` parent and ancestor part in query', (t) => {
t.plan(2)
// const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
const { a, p } = identifySubGraphsInQuery('p:root OR a:system')
t.deepEqual(a, ['system'], 'system was identified in ancestors')
t.deepEqual(p, ['root'], 'root was identified as parent')
})
test('identify ids and forms in query', (t) => {
t.plan(2)
// const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
const { ids, f } = identifySubGraphsInQuery('id:system OR f.id:form p:root OR a:system')
t.deepEqual(f, ['form'], 'form was identified')
t.deepEqual(ids, ['system'], 'system was identified')
})
test('identify `system` as id and `system` as ancestor part in query', (t) => {
t.plan(3)
const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
t.deepEqual(a, ['system'], 'system was identified in ancestors')
t.deepEqual(p, [], 'no parents where matched')
t.deepEqual(ids, ['system'], 'system was identified as direct reference')
})
ts
import { DateTime } from 'luxon'
import * as Types from '@aeppic/types'
import getNow from '@aeppic/shared/now'
// import { Form } from '../form'
// import resolveField from './resolve-field'
import { isIsoDate, parseDateRange } from './parse-date.js'
// import { joinMatchExpressions as joinParsedQueries } from './refining'
import { MatchOptions, MatchQueryFunction, isFieldMatchNode, FieldMatchNode, MatchExpression, Condition, Query, QueryParameters, GenericMatchQueryFunction, isQuery, QueryParameterValue } from './types.js'
import { parse } from './parse.js'
import { extractShapeFromQuery } from './shape.js'
import { QueryCache } from './cache.js'
import * as fields from './fields.js'
// type AccessInfo = {
// added: number
// lastAccess: number
// }
// type QueryTestFunctionCacheEntry = AccessInfo & {
// match: QueryTestFunction
// parsed: ParsedQuery
// }
// type QueryIdCacheEntry = AccessInfo & {
// id: number
// query: string
// }
// const TestFunctionsIds = new Map<string, QueryIdCacheEntry>()
// const TestFunctionsCache = new Map<string, QueryTestFunctionCacheEntry>()
const FIELD_MATCHERS = {
boolean: supportArrayMatches(fieldMatchesBoolean),
truthy: supportArrayMatches(fieldMatchesTruthy),
wildcard: supportArrayMatches(fieldMatchesWildcard),
range: supportArrayMatches(fieldMatchesRange),
term: supportArrayMatches(fieldMatchesTerm),
}
const DOCUMENT_ACCESS = 'document.'
const DATA_FIELD_PREFIX = 'data.'
const HELPERS = {
}
function MATCH_NOTHING() {return false}
MATCH_NOTHING.matchFunctionId = Symbol('Match NOTHING Matcher function')
const QUERY_FUNCTION_PARAMETER_NAMES = ['document', 'parameters', 'options']
type ShapeId = NonNullable<string>
export const GlobalQueryCache = new QueryCache<CompiledQuery>()
export function parseQueryAndFilter(queryString: string, filterStringOrOptions?: MatchOptions|string, specifiedOptions?: MatchOptions): Query {
let query: Query
let options: MatchOptions
if (!queryString || queryString.trim() === '') {
if (filterStringOrOptions && typeof filterStringOrOptions === 'string' && filterStringOrOptions.trim() !== '') {
queryString = filterStringOrOptions
filterStringOrOptions = ''
} else {
return {
graph: {
conditions: []
},
parameters: [],
options,
}
}
}
if (typeof filterStringOrOptions === 'string') {
const fullQuery = joinQueryAndFilter(queryString, filterStringOrOptions)
const parsedQuery = parse(fullQuery)
return {
graph: parsedQuery.graph,
parameters: parsedQuery.parameters,
options: specifiedOptions ?? {}
}
} else if (specifiedOptions) {
const parsedQuery = parse(queryString)
return {
graph: parsedQuery.graph,
parameters: parsedQuery.parameters,
options: specifiedOptions ?? {}
}
} else {
const parsedQuery = parse(queryString)
return {
graph: parsedQuery.graph,
parameters: parsedQuery.parameters,
options: filterStringOrOptions ?? specifiedOptions ?? {}
}
}
}
function matchAll(document: Types.Document) {
return true
}
export class QueryMatcher {
private _match: MatchQueryFunction
private _genericMatch: GenericMatchQueryFunction
private _query: Query
private _shapeId: ShapeId
constructor(query: Query) {
if (!isQuery(query)) {
throw new Error('Argument error. QueryMatcher requires Query as input')
}
this._query = query
if (this._query.graph.conditions.length === 0) {
this._shapeId = 'ALL'
this._match = matchAll
return
}
this._shapeId = extractShapeFromQuery(query)
const existingMatchFunction = GlobalQueryCache.get(this._shapeId)
if (existingMatchFunction) {
this._genericMatch = existingMatchFunction.match
} else {
const match = compileQuery(query)
const compiled: CompiledQuery = { match, shapeId: this._shapeId }
GlobalQueryCache.set(this._shapeId, compiled)
this._genericMatch = match
}
this._match = (d: Types.Document) => {
return this._genericMatch(d, this._query.parameters, this._query.options || {})
}
}
get shapeId() {
return this._shapeId
}
get matchFunctionId() {
return this._genericMatch.matchFunctionId
}
get query() {
return this._query
}
match(document: Types.Document) {
return this._match(document)
}
}
type CompiledQuery = {
// id: string
// displayName: string
// parsed: MatchExpression
match: GenericMatchQueryFunction
// fullQuery: string
// originalQuery: string
// originalFilter: string
shapeId: ShapeId
// queryFunctionId: number
}
function joinQueryAndFilter(queryString: string, filterString: string): string {
let fullQuery = null
if (queryString && queryString.trim() !== '') {
fullQuery = queryString
}
if (filterString && filterString.trim() !== '') {
if (fullQuery) {
return `(${fullQuery}) AND (${filterString})`
} else {
return filterString
}
} else {
return fullQuery
}
}
// const { match: matchQuery, parsed: parsedQuery, shapeId: queryShape } = compileQuery(queryString, options)
// const { match: matchFilter, parsed: parsedFilter, shapeId: filterShape } = compileQuery(filterString, options)
// // const displayName = `dynamic://aeppic/queries/${queryFunctionId}/${filterFunctionId}`
// const match = joinMatchFunctions(matchQuery, matchFilter)
// return {
// // displayName,
// match,
// parsed: joinParsedQueries(parsedQuery, parsedFilter),
// originalQuery: queryString,
// originalFilter: filterString,
// shapeId: queryShape + '~' + filterShape,
// }
// }
// function joinMatchFunctions(matchQuery: MatchQueryFunction, matchFilter: MatchQueryFunction): MatchQueryFunction {
// if (!matchFilter) {
// return matchQuery
// }
// const matchFunction: any = (document: Types.Document) => {
// const queryMatches = matchQuery(document)
// if (queryMatches === false || !matchFilter) {
// return queryMatches
// }
// return matchFilter(document)
// }
// matchFunction.matchFunctionId = Symbol('Joined Match Function')
// return matchFunction
// }
function compileQuery(query: Query): GenericMatchQueryFunction {
// const start = process.hrtime.bigint()
const queryFunctionDefinition = buildQueryFunctionDefinition(query)
try {
const innerMatchFunction = new Function('now', ...QUERY_FUNCTION_PARAMETER_NAMES, 'fieldMatchers', 'helpers', queryFunctionDefinition)
const wrappedFn = <GenericMatchQueryFunction> function(document: Types.Document, parameters: QueryParameters, options: MatchOptions) {
const now = getNow()
return innerMatchFunction(now, document, parameters, options, FIELD_MATCHERS, HELPERS)
}
wrappedFn.displayName = `Query`
wrappedFn.matchFunctionId = Symbol(`Match Function for ${wrappedFn.displayName}`)
return wrappedFn
} catch (compileError) {
// tslint:disable-next-line no-console
console.error('Error compiling dynamic code', compileError, queryFunctionDefinition)
throw new Error('Compilation Error')
} finally {
// const end = process.hrtime.bigint()
// const diffInNanoseconds = end - start
// const totalDurationInMs = Number(diffInNanoseconds / BigInt(1000000))
// console.log('compile', diffInNanoseconds, totalDurationInMs)
}
}
function buildQueryFunctionDefinition(query: Query): string {
const MATCH_CONDITIONS = compileMatchExpressions(query.graph, query.parameters, query.options)
const definition = `
/*
* QUERY
*
* Built: ${new Date().toISOString()}
*
****
Graph:
${JSON.stringify(query.graph, null, 2)})
Options:
${JSON.stringify(query.options, null, 2)})
****
*/
const matches = ${MATCH_CONDITIONS || 'false'};
return matches
`
// console.log('QUERY FUNC', query)
// console.log(definition)
return definition
}
function compileMatchExpressions(expression: MatchExpression, parameters: QueryParameters, options: MatchOptions) {
const { conditions } = expression
if (conditions.length === 1) {
return compileMatchConditionExpression(conditions[0], parameters, options)
}
const expressions = []
for (let i = 0; i < conditions.length; i ++) {
const condition = conditions[i]
const expression = compileMatchConditionExpression(condition, parameters, options)
if (expression) {
expressions.push(expression)
}
}
const operator = 'operator' in expression ? expression.operator : 'AND'
if (operator === 'AND') {
return joinExpressions(expressions, ' && ')
} else if (operator === 'OR') {
return joinExpressions(expressions, ' || ')
} else if (operator === 'NOT') {
throw new Error('NOT operator should have been replaced with "-" termPrefix')
} else {
throw new Error('Unknown operator')
}
}
function joinExpressions(expressions: string[], operator) {
if (expressions.length === 1) {
return expressions[0]
} else if (expressions.length > 1) {
return '( ' + expressions.join(operator) + ' )'
} else {
return '/* NO EXPRESSIONS */'
}
}
function compileMatchConditionExpression(expression: Condition, parameters: QueryParameters, options: MatchOptions) {
if (isFieldMatchNode(expression)) {
const compiled = compileFieldMatchExpression(expression, parameters, options)
if (expression.termPrefix !== '-') {
return compiled
} else {
return `!(${compiled})`
}
} else {
return '( ' + compileMatchExpressions(expression, parameters, options) + ' )'
}
}
function compileFieldMatchExpression(fieldMatch: FieldMatchNode, parameters: QueryParameters, options: MatchOptions) {
if (typeof fieldMatch.term === 'undefined' && typeof fieldMatch.term_max === 'undefined' && typeof fieldMatch.term_min === 'undefined') {
console.warn('NOTE: Unsupported condition', fieldMatch)
return
}
if (fieldMatch.field) {
const singleFieldMatchExpression = compileSingleFieldMatchExpression(`${toSafeFieldAccess(fieldMatch.field)}`, fieldMatch, parameters, options)
return singleFieldMatchExpression
} else if (options.searchAllFields === true) {
const allFieldsMatchExpression = compileAllDataFieldsMatchExpression(fieldMatch, parameters, options)
return allFieldsMatchExpression
} else {
const singleFieldMatchExpression = compileSingleFieldMatchExpression('document.data.name', fieldMatch, parameters, options)
return singleFieldMatchExpression
}
}
export function toSafeFieldAccess(fieldPath: string) {
// if (fieldPath === 'id'
// || fieldPath === 'a'
// || fieldPath === 'f.id'
// || fieldPath === 'stamps.id'
// || fieldPath === 'stamps.type.id'
// || fieldPath === 'hidden'
// || fieldPath === 'cloneOf.id'
// || fieldPath === 'readonly'
// || fieldPath.startsWith('stampData')
// || fieldPath.startsWith('modified')
// || fieldPath.startsWith('created')
// ) {
// return fieldPath
// }
const indexOfFirstSubField = fieldPath.indexOf('.')
if (indexOfFirstSubField < 0) {
return `${DOCUMENT_ACCESS}${fieldPath}`
}
const firstField = fieldPath.substr(0, indexOfFirstSubField)
if (firstField === 'data' || firstField === 'stamps') {
const firstSubFieldEnd = fieldPath.indexOf('.', indexOfFirstSubField + 1)
if (firstSubFieldEnd > 0) {
const firstSubField = fieldPath.substring(indexOfFirstSubField + 1, firstSubFieldEnd)
const subAccess = fieldPath.substr(firstSubFieldEnd + 1)
const safe = subAccess.replace(/\./g, '?.')
return `${DOCUMENT_ACCESS}${firstField}.${firstSubField}?.${safe}`
} else {
const subAccess = fieldPath.substr(firstField.length + 1)
const safe = subAccess.replace(/\./g, '?.')
return `${DOCUMENT_ACCESS}${firstField}.${safe}`
}
}
const safe = DOCUMENT_ACCESS + fieldPath.replace(/\./g, '?.')
return safe
}
function compileAllDataFieldsMatchExpression(fieldMatch: FieldMatchNode, parameters: QueryParameters, options: MatchOptions) {
return `
(function(){
for (const dataFieldName of Object.keys(document.data)) {
const field = document.data[dataFieldName]
const matches = (${compileSingleFieldMatchExpression('field', fieldMatch, parameters, options)})
if (matches) {
return true
}
}
return false
})()
`
}
export function compileSingleFieldMatchExpression(fieldPath: string, fieldMatch: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): string {
const fieldAccessor = buildIntermediaryArraySafeAccessorForField(fieldPath)
// return '((1 === 1) && console.log(options))'
if (fields.matchesAnyTruthyValue(fieldMatch, parameters)) {
return `fieldMatchers.truthy(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters)`
} else if (fields.isRangeTerm(fieldMatch, parameters)) {
return `fieldMatchers.range(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters, { nowUtcTz: now.isoTz })`
} else if (fields.comparesToBoolean(fieldMatch, parameters)) {
return `fieldMatchers.boolean(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters)`
} else if (fields.isToBeComparedLiterally(fieldMatch, parameters)) {
return `fieldMatchers.term(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters)`
} else if (fields.isWildcardMatch(fieldMatch, parameters)) {
return `fieldMatchers.wildcard(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters)`
} else {
return `fieldMatchers.term(${fieldAccessor}, ${JSON.stringify(fieldMatch)}, parameters)`
}
}
function fieldMatchesTerm(fieldValue: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
if (fieldValue == null) {
return false
}
const term = parameters[condition.term.id]
const termIsArray = Array.isArray(term)
const singleTermMatcher = SINGLE_TERM_MATCHERS[typeof fieldValue]
if (!singleTermMatcher) {
return false
}
if (!termIsArray) {
return singleTermMatcher(fieldValue, term, options)
} else {
for (const singleTerm of term) {
if (singleTermMatcher(fieldValue, singleTerm, options)) {
return true
}
}
}
return false
}
export function buildIntermediaryArraySafeAccessorForField(fieldPath: string): string {
if (fieldPath === 'field') {
return fieldPath
}
const subFieldBeginning = fieldPath.indexOf('.', DOCUMENT_ACCESS.length)
if (subFieldBeginning >= 0) {
const firstFieldAccessorPath = fieldPath.substring(DOCUMENT_ACCESS.length, subFieldBeginning)
if (firstFieldAccessorPath === 'stamps') {
const innerFieldAccessor = fieldPath.substring(subFieldBeginning + 1)
const intermediaryArrayLookup = `${DOCUMENT_ACCESS}${firstFieldAccessorPath}?.map(fieldValue => fieldValue?.${innerFieldAccessor})`
return intermediaryArrayLookup
} else if (firstFieldAccessorPath === 'data') {
const dataFieldNameStart = DOCUMENT_ACCESS.length + firstFieldAccessorPath.length + 1
const dataFieldNameEnd = fieldPath.indexOf('.', dataFieldNameStart)
const dataFieldName = fieldPath.substring(dataFieldNameStart, dataFieldNameEnd < 0 ? undefined : dataFieldNameEnd)
if (dataFieldNameEnd < 0) {
return fieldPath
}
const dataFieldNameWithoutQuestionMarkEnd = dataFieldName.substr(0, dataFieldName.length - 1)
const remainderFieldAccessor = fieldPath.substring(dataFieldNameEnd)
const intermediaryArrayLookup = `${DOCUMENT_ACCESS}${DATA_FIELD_PREFIX}${dataFieldNameWithoutQuestionMarkEnd}.map(fieldValue => fieldValue?${remainderFieldAccessor})`
return `(Array.isArray(document.data.${dataFieldNameWithoutQuestionMarkEnd}) ? ${intermediaryArrayLookup} : ${fieldPath})`
}
}
return fieldPath
}
const SINGLE_TERM_MATCHERS = {
string: _stringFieldMatchesSingleTerm,
number: _numberFieldMatchesSingleTerm,
object: _objectFieldMatchesSingleTerm,
}
// function _booleanFieldMatchesSingleTerm(field: any, term: string|number|boolean, options: MatchOptions) {
// const termAsBoolean = typeof term === 'boolean' ? term : term === 'true'
// if (termAsBoolean) {
// return termAsBoolean === field
// } else {
// return false
// }
// }
function _numberFieldMatchesSingleTerm(field: any, term: string|number|boolean, options: MatchOptions) {
const termAsNumber = +term
const termIsNumber = !isNaN(termAsNumber)
if (termIsNumber) {
return termAsNumber === field
} else {
return false
}
}
function _stringFieldMatchesSingleTerm(field: any, term: any, options: MatchOptions) {
if (field === term) {
return true
}
if (term == null) {
if (field == null) {
return true
}
return false
}
if (typeof term === 'number') {
return field === term.toString()
}
if (typeof term === 'string') {
if (isIsoDate(term)) {
const charactersToCompare = term.length
if (field.length === charactersToCompare) {
return (field === term)
}
const comparisonBase = field.substr(0, charactersToCompare)
return comparisonBase === term
}
return field.toLowerCase() === term.toLowerCase()
}
return field === term.toString()
}
function _objectFieldMatchesSingleTerm(field: any, term: any, options: MatchOptions) {
if ('id' in field) {
if (term === field.id) {
return true
}
} else if ('text' in field) {
return _stringFieldMatchesSingleTerm(field.text, term, options)
}
}
const IS_NON_LETTER_OR_NEWLINE = /^[\W]+$/
type FieldTermMatcher = (field: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions) => boolean
function supportArrayMatches(matcher: FieldTermMatcher): FieldTermMatcher {
return function match(field: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions) {
if (field == null) {
return false
}
if (!Array.isArray(field)) {
return matcher(field, condition, parameters, options)
}
for (const singleFieldValue of field) {
if (Array.isArray(singleFieldValue)) {
for (const item of singleFieldValue) {
if (matcher(item, condition, parameters, options)) {
return true
}
}
} else {
if (matcher(singleFieldValue, condition, parameters, options)) {
return true
}
}
}
return false
}
}
function fieldMatchesRange(field: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
const min = parameters[condition.term_min.id]
const max = parameters[condition.term_max.id]
if (min === '*' && max === '*') {
return !!field
}
if (typeof field === 'number') {
return numberFieldMatchesRange(field, condition, parameters, options)
} else if (typeof field === 'string') {
if (isIsoDate(field)) {
return dateFieldMatchesRange(field, condition, parameters, options)
} else {
return stringFieldMatchesRange(field, condition, parameters, options)
}
}
return false
}
function dateFieldMatchesRange(field: string, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
const fieldDate = DateTime.fromISO(field)
const min = parameters[condition.term_min.id]
const max = parameters[condition.term_max.id]
if (min !== '*') {
const minDate = parseDateRange(min, { now: options.nowUtcTz })
const minMatch = condition.inclusive !== false ? fieldDate >= minDate : fieldDate > minDate
if (!minMatch) {
return false
}
}
if (max !== '*') {
const maxDate = parseDateRange(max, { now: options.nowUtcTz })
const maxMatch = condition.inclusive !== false ? fieldDate <= maxDate : fieldDate < maxDate
if (!maxMatch) {
return false
}
}
return true
}
function numberFieldMatchesRange(field: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
const min = parameters[condition.term_min.id]
const max = parameters[condition.term_max.id]
if (min !== '*') {
const termMinAsNumber = +min
const termMinIsNumber = !isNaN(termMinAsNumber)
if (!termMinIsNumber) {
return false
}
const minMatch = condition.inclusive !== false ? field >= termMinAsNumber : field > termMinAsNumber
if (!minMatch) {
return false
}
}
if (max !== '*') {
const termMaxAsNumber = +max
const termMaxIsNumber = !isNaN(termMaxAsNumber)
if (!termMaxIsNumber) {
return false
}
const maxMatch = condition.inclusive !== false ? field <= termMaxAsNumber : field < termMaxAsNumber
if (!maxMatch) {
return false
}
}
return true
}
function stringFieldMatchesRange(field: string, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions) {
const min = parameters[condition.term_min.id]
const max = parameters[condition.term_max.id]
if (min !== '*') {
const minMatch = condition.inclusive !== false ? field >= min : field > min
if (!minMatch) {
return false
}
}
if (max !== '*') {
const maxMatch = condition.inclusive !== false ? field <= max : field < max
if (!maxMatch) {
return false
}
}
return true
}
function fieldMatchesTruthy(field: any, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
if (!field) {
return false
}
if (typeof field === 'object') {
if ('id' in field) {
return !!field.id
}
}
return (!!field)
}
function fieldMatchesBoolean(field: string, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
const term = parameters[condition.term.id]
if (typeof field !== 'boolean') {
return false
}
if (typeof term === 'boolean') {
return field === term
}
if (typeof term === 'string') {
const lowerCaseTerm = term.toLowerCase()
if (field && lowerCaseTerm === 'true') {
return true
} else if (!field && lowerCaseTerm === 'false') {
return true
}
}
return false
}
function fieldMatchesWildcard(field: string|object, condition: FieldMatchNode, parameters: QueryParameters, options: MatchOptions): boolean {
const term = parameters[condition.term.id]
// Wildcard match in Refs
if (typeof field === 'object') {
const o: any = field
if ('text' in o && 'id' in o) {
if (o.id && o.text) {
return fieldMatchesWildcard(o.text, condition, parameters, options)
}
}
return false
}
if (typeof field !== 'string' || typeof term !== 'string') {
return false
}
const trimmedTerm = term.trim()
if (!trimmedTerm.endsWith('*')) {
return false
}
const hasLeadingWildcard = (trimmedTerm[0] === '*')
const termOffset = hasLeadingWildcard ? 1 : 0
const termLengthWithoutWildcards = term.length - 1 - termOffset
const termWithoutWildcard = trimmedTerm.substr(termOffset, termLengthWithoutWildcards)
// const isCaseSensitiveMatch = isAllUppercase(termWithoutWildcard) && termWithoutWildcard.length >= 3
const isCaseSensitiveMatch = false
const fieldValueToMatch = isCaseSensitiveMatch ? field : field.toLowerCase()
const termToMatch = isCaseSensitiveMatch ? termWithoutWildcard : termWithoutWildcard.toLowerCase()
let foundAtIndex = -1
for (;;) {
foundAtIndex = fieldValueToMatch.indexOf(termToMatch, foundAtIndex + 1)
if (foundAtIndex < 0) {
return false
} else if (foundAtIndex === 0) {
return true
}
const characterBeforeMatch = fieldValueToMatch[foundAtIndex - 1]
if (hasLeadingWildcard) {
return true
} else if (IS_NON_LETTER_OR_NEWLINE.test(characterBeforeMatch)) {
return true
} else {
continue
}
}
}
// const { queries } = process(queryStringPart, filterStringPart, { omitTest: true })
// const a = []
// const p = []
// const f = []
// const ids = []
// // TODO: Actually we would have to identify sub graphs within each query and potentially join later
// for (const query of queries) {
// for (const condition of query) {
// if (condition.field === 'p') {
// p.push(condition.term)
// } else if (condition.field === 'a') {
// a.push(condition.term)
// } else if (condition.field === 'id') {
// ids.push(condition.term)
// } else if (condition.field === 'f.id') {
// f.push(condition.term)
// }
// }
// }
// return { a, p, ids, f, subQueryCount: queries.length }
// }
export type SubGraphInfo = {
a: string[]
p: string[]
f: string[]
ids: string[]
}
export function isQueryOfPartialDocuments(_query, options) {
if (options) {
if (options.field || options.fields || options.omitField || options.omitFields) {
return true
}
}
return false
}
export function isTimeDependentQuery(query: Query): boolean {
for (const fieldMatcher of enumerateFieldMatcherNodes(query)) {
if (isTimeDependentFieldMatch(fieldMatcher, query.parameters)) {
return true
}
}
return false
}
function isTimeDependentFieldMatch(fieldMatch: FieldMatchNode, parameters: QueryParameters) {
if (fieldMatch.term) {
return false
}
if (fieldMatch.term_min) {
const param = parameters[fieldMatch.term_min.id]
if (isTimeDependentParameter(param)) {
return true
}
}
if (fieldMatch.term_max) {
const param = parameters[fieldMatch.term_max.id]
if (isTimeDependentParameter(param)) {
return true
}
}
return false
}
function isTimeDependentParameter(parameter: QueryParameterValue) {
return parameter.includes('NOW')
}
export function* enumerateFieldMatcherNodes(query: Query): IterableIterator<FieldMatchNode> {
let nodes: [FieldMatchNode|MatchExpression] = [query.graph]
for (;;) {
const currentNode = nodes.shift()
if (!currentNode) {
break
}
if (isFieldMatchNode(currentNode)) {
yield currentNode
} else {
for (const condition of currentNode.conditions) {
nodes.push(condition)
}
}
}
}
export function identifySubGraphsInQuery(queryOrQueryString: string|Query): SubGraphInfo {
const info = {
a: new Set<string>(),
p: new Set<string>(),
f: new Set<string>(),
ids: new Set<string>(),
}
const query = typeof queryOrQueryString === 'string' ? parse(queryOrQueryString) : queryOrQueryString
let nodes: [FieldMatchNode|MatchExpression] = [query.graph]
for (const currentNode of enumerateFieldMatcherNodes(query)) {
if (currentNode.field === 'p') {
registerFieldInfo(query, currentNode, 'p', info.p)
} else if (currentNode.field === 'a') {
registerFieldInfo(query, currentNode, 'a', info.a)
} else if (currentNode.field === 'id') {
registerFieldInfo(query, currentNode, 'id', info.ids)
} else if (currentNode.field === 'f.id') {
registerFieldInfo(query, currentNode, 'f.id', info.f)
}
}
return {
a: Array.from(info.a.values()),
p: Array.from(info.p.values()),
f: Array.from(info.f.values()),
ids: Array.from(info.ids.values()),
}
}
function registerFieldInfo(query: Query, node: FieldMatchNode, fieldName: string, register: Set<string>) {
const value = query.parameters[node.term.id]
if (Array.isArray(value)) {
for (const v of value) {
register.add(v)
}
} else {
register.add(value)
}
}
Query Finding Algorithm
Takes a query graph and allows to test documents against it to find out whether a document matches the query or not.
query-matcher.spec.cjs and finder.ts
js
'use strict'
const test = require('tape')
// const util = require('util')
const { Querying: { QueryMatcher, GlobalQueryCache, Scopes, parseQueryAndFilter, toSafeFieldAccess, identifySubGraphsInQuery, isQuery } } = require('../../dist/aeppic.cjs')
const SIMPLE_QUERY = 'p:2323'
const QUERY = 'data.name:hallo data.age:1'
const ALL_FIELDS_QUERY = '*hallo*'
const FILTER = 'f.id:form p:2323'
const FILTER_ONE_TERM = 'f.id:form'
// const QUERY_SIMPLE_OR = 'data.name:hallo OR data.age:999'
// const QUERY_SIMPLE_OR_NESTED = 'data.name:hallo AND (data.age:2 OR data.age:999)'
// const QUERY_SIMPLE_OR_DEEPLY_NESTED = '(data.name:hallo OR data.name:hallo2) AND (data.age:2 OR data.age:999)'
const QUERY_SIMPLE_SUFFIX_WILDCARD = 'data.name:Hal*'
const QUERY_SIMPLE_FULL_WILDCARD = 'data.name:*hal*'
// const QUERY_WITH_OUTER_BRACKETS_NESTED = '(a:1 f.id:form OR a:2 data.name:hallo)'
const MATCHING_OBJECT = {
p: '2323',
f: {
id: 'form'
},
data: {
name: 'hallo',
age: 1,
}
}
const MATCHING_OBJECT_WITH_ARRAY = {
p: '2323',
f: {
id: 'form'
},
data: {
name: ['not-hallo', 'hallo'],
age: 1,
}
}
const MISMATCHING_OBJECT = {
test: 'some other value',
f: {
id: 'form'
},
data: {
name: 'world'
}
}
const UPPERCASE_TEST = {
test: 'some other value',
data: {
name: 'This is a world that truly needs some ARS and RLD'
}
}
const UPPERCASE_TEST_NO_MATCH = {
test: 'some other value',
data: {
name: 'I say ars is not really a sensible word'
}
}
test('Build a one-term query matcher', (t) => {
const matcher = buildQueryMatcher('data.field:1')
t.ok(matcher, 'can build a function')
t.equal(Object.keys(matcher.query.parameters).length, 1)
t.end()
})
test('Safe Field Access Rewrite', (t) => {
t.equal(toSafeFieldAccess('data.icon.dataUrl'), 'document.data.icon?.dataUrl')
t.equal(toSafeFieldAccess('id'), 'document.id')
t.equal(toSafeFieldAccess('data.f'), 'document.data.f')
t.equal(toSafeFieldAccess('data.f.text'), 'document.data.f?.text')
t.end()
})
test('build a query test function', (t) => {
t.plan(3)
const fn = buildQueryMatchFunction(QUERY, FILTER)
t.ok(fn, 'can build a function')
t.ok(fn(MATCHING_OBJECT), 'matching the object')
t.notOk(fn(MISMATCHING_OBJECT), 'not matching the object')
})
test('build a query test function with a single term component', (t) => {
t.plan(3)
const fn = buildQueryMatchFunction(QUERY, FILTER_ONE_TERM)
t.ok(fn, 'can build a function')
t.ok(fn(MATCHING_OBJECT), 'matching the object')
t.notOk(fn(MISMATCHING_OBJECT), 'not matching the object')
})
test('Reuse a query if the shape of the query is identical', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('data.name:HALLO')
const matcher2 = buildQueryMatcher('data.name:BYE-BYE')
const fn1 = matcher1.shapeId
const fn2 = matcher2.shapeId
t.equal(fn1, fn2, 'Tester should use exact same shape')
t.end()
})
test('Test a document against an empty query', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('')
t.ok(matcher1.match({ data: { name: 'test' } }), 'Empty match function matches all documents')
t.end()
})
test('Different shape is query is identical but filter isnt', (t) => {
// Both queries access the same field
const matcher1 = buildQueryMatcher('data.name:HALLO')
const matcher2 = buildQueryMatcher('data.name:BYE-BYE', 'data.someOtherField:"NOTHING_SPECIAL"')
t.notEqual(matcher1.shapeId, matcher2.shapeId, 'Tester should use different same shape')
t.end()
})
test('reusing a query test function vs re-building it', (t) => {
GlobalQueryCache.reset()
measure(t, 'Always a new matcher', (i) => {
const match = buildQueryMatchFunction(QUERY, `data.age:[1 TO ${1 + i}]`)
match(MATCHING_OBJECT)
})
measure(t, 'Same matcher', () => {
const match = buildQueryMatchFunction(QUERY, 'data.age:[1 TO 1]')
match(MATCHING_OBJECT)
})
measure(t, 'Always a new matcher using query builder', (i) => {
const q = Scopes.global().where('data.age').isInRange(1, 1 + i).toQuery()
const matcher = new QueryMatcher(q)
matcher.match(MATCHING_OBJECT)
})
t.end()
})
test('build a query test function with DIFFERENT shape over and over again', (t) => {
GlobalQueryCache.reset()
measure(t, 'Different Shape builds', (i) => {
buildQueryMatchFunction(QUERY, 'data.something:"x"', `data.field${i}:${i} AND data.otherField:${i} OR data.andAnotherField1:${i} AND data.andAnotherField2:${i} data.andAnotherField3:${i}`)
})
t.end()
})
test('build a query test function with SAME shape over and over again', (t) => {
GlobalQueryCache.reset()
measure(t, 'Same Shape builds', (i) => {
buildQueryMatchFunction(QUERY, 'data.something:"x"', `data.field:${i} AND data.otherField:${i} OR data.andAnotherField1:${i} AND data.andAnotherField2:${i} data.andAnotherField3:${i}`)
})
t.end()
})
test('Use Query Tester against document', (t) => {
const testFunction = buildQueryMatchFunction(QUERY)
measure(t, 'Test matching document', () => {
testFunction(MATCHING_OBJECT)
})
t.end()
})
function measure(t, name, functionToTest) {
const WARMUP_COUNT = 100
const COUNT = 10000
for (let i = 0; i < WARMUP_COUNT; i += 1) {
functionToTest(i + 1234567)
}
const start = process.hrtime.bigint()
for (let i = 0; i < COUNT; i += 1) {
functionToTest(i)
}
const end = process.hrtime.bigint()
const diffInNanoseconds = end - start
const totalDurationInMs = Number(diffInNanoseconds / BigInt(1000000))
const durationInMsPerCall = totalDurationInMs / COUNT
const COMPARISON_TIME_FRAME_IN_MS = 1000
const NUMBER_OF_CALLS_WE_CAN_GET_DONE_IN_DESIRED_TIME =
Math.floor(COMPARISON_TIME_FRAME_IN_MS / durationInMsPerCall)
t.comment(`${name}: ${durationInMsPerCall}ms/call - ${totalDurationInMs}ms`)
t.comment(`Maximum number of calls we can perform in ${COMPARISON_TIME_FRAME_IN_MS}ms is ${NUMBER_OF_CALLS_WE_CAN_GET_DONE_IN_DESIRED_TIME}`)
}
test('simple query with empty filter', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(SIMPLE_QUERY, '')
// debugPrint(tester.queries)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the object')
})
test('simple query with empty filter', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(SIMPLE_QUERY, '')
// debugPrint(tester.queries)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the object')
})
test('query tester handles simple SUFFIX WILDCARD queries', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_SUFFIX_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the mismatching object')
})
test('query tester handles simple SUFFIX WILDCARD queries for arrays too', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_SUFFIX_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.ok(match(MATCHING_OBJECT_WITH_ARRAY), 'matching the object')
})
test('query tester handles simple FULL WILDCARD queries for arrays too', (t) => {
t.plan(2)
const match = buildQueryMatchFunction(QUERY_SIMPLE_FULL_WILDCARD)
t.ok(match(MATCHING_OBJECT), 'matching the object')
t.ok(match(MATCHING_OBJECT_WITH_ARRAY), 'matching the object')
})
test('query tester handles arrays inside field expressions in documents', (t) => {
const match = buildQueryMatchFunction('stamps.id:stamp-a OR data.refs.text:A*')
t.ok(match({ data: { refs: [{ text: 'atest' }] } }))
t.ok(match({ data: { refs: { text: 'abc' } } }))
t.ok(match({ data: { refs: [{ text: 'xyz' }, { text: 'abc' }] } }))
t.ok(match({ stamps: [{ id: 'stamp-a' }], data: { refs: { text: 'abc' } } }))
t.ok(match({ stamps: [{ id: 'stamp-b' }], data: { refs: { text: 'abc' } } }))
t.notOk(match({ stamps: [{ id: 'stamp-b' }], data: { refs: { text: 'SOMETHING' } } }))
t.notOk(match({ stamps: [{ id: 'stamp-c' }], data: { refs: { text: 'NOTHING' } } }))
t.end()
})
test('query tester looking for truthy with simple *', (t) => {
const match = buildQueryMatchFunction('data.a:*')
t.ok(match({ data: { a: 1 } }))
t.ok(match({ data: { a: 'TEST' } }))
t.ok(match({ data: { a: { id: 'ref-a', text: 'test', v: '' } } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.notOk(match({ data: { a: '' } }))
t.end()
})
test('query tester looking for truthy with simple *', (t) => {
const match = buildQueryMatchFunction('data.a:[* TO *]')
t.ok(match({ data: { a: 1 } }))
t.ok(match({ data: { a: true } }))
t.ok(match({ data: { a: 'TEST' } }))
t.ok(match({ data: { a: { id: 'ref-a', text: 'test', v: '' } } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: false } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.notOk(match({ data: { a: '' } }))
t.end()
})
test('query tester looking for boolean true', (t) => {
const match = buildQueryMatchFunction('data.flag:true')
t.ok(match({ data: { flag: true } }))
t.notOk(match({ data: { flag: false } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: 1 } }))
t.notOk(match({ data: { a: '' } }))
t.notOk(match({ data: { a: 'test' } }))
t.notOk(match({ data: { a: { id: 'test', text: '', v: '' } } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.end()
})
test('query tester looking for boolean false', (t) => {
const match = buildQueryMatchFunction('data.flag:false')
t.notOk(match({ data: { flag: true } }))
t.ok(match({ data: { flag: false } }))
t.notOk(match({ data: { a: 0 } }))
t.notOk(match({ data: { a: 1 } }))
t.notOk(match({ data: { a: '' } }))
t.notOk(match({ data: { a: 'test' } }))
t.notOk(match({ data: { a: { id: 'test', text: '', v: '' } } }))
t.notOk(match({ data: { a: { id: '', text: '', v: '' } } }))
t.end()
})
test('query tester can search in all fields with wildcard (including .text in refs)', (t) => {
const query = parseQueryAndFilter('A*', { searchAllFields: true })
const matcher = new QueryMatcher(query)
const match = (d) => matcher.match(d)
t.ok(match({ data: { f: 'a' } }))
t.ok(match({ data: { name: 'a' } }))
t.ok(match({ data: { f: ['b', 'a'] } }))
t.ok(match({ data: { f: ['b', 'abc'] } }))
t.ok(match({ data: { refs: [{ id: 'abc', text: 'atest' }] } }))
t.ok(match({ data: { refs: { id: 'abc', text: 'abc' } } }))
t.notOk(match({ data: { refs: { id: '', text: 'abc' } } }), 'Will NOT find refs without id value set')
t.ok(match({ data: { refs: [{ text: 'xyz' }, { id: 'a', text: 'abc' }] } }))
t.notOk(match({ data: { refs: [{ text: 'xyz' }, { id: 'abc' }] } }), 'Doesn\'t wildcard match in id')
t.ok(match({ data: { f: 'a' } }))
t.end()
})
test('query wildcard does NOT match internal word matches', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:allo*')
t.notOk(match(MATCHING_OBJECT), 'should not have matched the object (as it is hallo not allo)')
t.notOk(match(MISMATCHING_OBJECT), 'not matching the mismatching object')
})
test('query wildcard with matches lowercase/uppercase content', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:ARS*')
t.ok(match(UPPERCASE_TEST), 'matching the UPPERCASE document')
t.ok(match(UPPERCASE_TEST_NO_MATCH), 'also matching the lowercase variant')
})
test('query wildcard with all lowercase matches both lowercase and uppercase content', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('data.name:ars*')
t.ok(match(UPPERCASE_TEST), 'matching the UPPERCASE document')
t.ok(match(UPPERCASE_TEST_NO_MATCH), 'matching the lowercase variant')
})
test('Query with simple NOT', (t) => {
t.plan(1)
const match = buildQueryMatchFunction('data.name:hallo NOT data.age:1')
t.notOk(match(
{
data: {
name: 'hallo',
age: 1,
}
}), 'not matching due to age')
})
test('Query with simple NOT of one term', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('p:parent data.name:hallo NOT data.age:1 AND data.text:frank')
t.ok(match(
{
p: 'parent',
data: {
name: 'hallo',
age: 2,
text: 'frank'
}
}), 'matching since age is different and text matches (NOT only applies to next term)')
t.notOk(match(
{
data: {
name: 'hallo',
age: 1,
text: 'frank'
}
}), 'not matching due to age')
})
test('Query with simple NOT of two terms', (t) => {
t.plan(2)
const match = buildQueryMatchFunction('p:parent data.name:hallo NOT data.age:1 NOT data.text:frank')
t.ok(match(
{
p: 'parent',
data: {
name: 'hallo',
age: 2,
text: 'XYZ'
}
}), 'matching since age is different and text is different')
t.notOk(match(
{
data: {
name: 'hallo',
age: 5,
text: 'frank'
}
}), 'not matching due to text')
})
test('Query with nested NOT', (t) => {
t.plan(1)
t.throws(() => buildQueryMatchFunction('data.name:hallo NOT (data.age:1 data.text:frank)'))
})
test('query tester handles OR queries', (t) => {
t.plan(9)
const match = buildQueryMatchFunction('data.name:hallo OR data.age:999')
t.ok(match({ data: { name: 'hallo', age: 999 } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: 4 } }))
t.ok(match({ data: { name: 'test', age: 999 } }))
t.ok(match({ data: { name: 'test', age: 999 } }))
t.notOk(match({ data: { name: 'test', age: 9999 } }))
t.notOk(match({ data: { name: 'test' } }))
t.ok(match({ data: { name: 'hallo' } }))
})
test('query tester handles nested OR queries', (t) => {
t.plan(6)
const match = buildQueryMatchFunction('data.name:hallo AND (data.age:2 OR data.age:999)')
t.ok(match({ data: { name: 'hallo', age: 999 } }))
t.ok(match({ data: { name: 'hallo', age: '999' } }))
t.ok(match({ data: { name: 'hallo', age: 2 } }))
t.notOk(match({ data: { name: 'hallo', age: 1 } }))
t.notOk(match({ data: { name: 'hallo-x', age: 2 } }))
t.notOk(match({ data: { name: 'hallo-x', age: 999 } }))
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(6)
const match = buildQueryMatchFunction('data.name:hallo OR data.age:999 data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor BOTH conditions after OR (which have an implicit AND)')
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(7)
const match = buildQueryMatchFunction('(data.name:hallo OR data.age:999) data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor the last condition')
t.ok(match({ data: { name: 'hallo', age: 0, more: ['this-matches', 'different'] } }), 'more matches here as the suffix query sees the non-letter character in front as beginning of a token')
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(3)
const match = buildQueryMatchFunction('data.more:"m*"')
t.notOk(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'm*' } }))
t.notOk(match({ data: { name: 'hallo', age: 999, more: 'test-m*' } }))
})
test('query tester detects range queries', (t) => {
const FORM_AND_DATE_QUERY = 'f.id:FORM data.date:[* TO now+1d/d]'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY)
const oldDate = new Date(1800, 1, 1).toISOString()
const futureDate = new Date(new Date().getFullYear() + 1, 1, 1).toISOString()
t.ok(match({ f: { id: 'FORM' }, data: { date: oldDate } }))
t.notOk(match({ f: { id: 'FORM' }, data: { date: futureDate } }))
t.end()
})
test('query tester testing range of numbers', (t) => {
const FORM_AND_DATE_QUERY = 'f.id:FORM data.age:{10 TO 99}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY)
t.notOk(match({ f: { id: 'FORM' }, data: { age: 3 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 10 } }))
t.ok(match({ f: { id: 'FORM' }, data: { age: 11 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 99 } }))
t.notOk(match({ f: { id: 'FORM' }, data: { age: 100 } }))
t.notOk(match({ data: { age: 50, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester testing range of dates', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:[2012-08-01T05:00:00Z TO 2015-08-05T00:00:00Z]'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:01Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:01+01:00' } }))
t.end()
})
test('query tester testing range of dates (exclusive range)', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:{2012-08-01T05:00:00Z TO 2015-08-05T00:00:00Z}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:01Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:00-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-10T00:00:00+00:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:00' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2035-08-05T00:00:00+00:30' } }))
t.notOk(match({ data: { aDate: now, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester testing range of dates with rounding', (t) => {
const now = '2014-07-26T10:59:32+00:00'
const FORM_AND_DATE_QUERY = 'f.id:FORM data.aDate:{2012-08-01T05:00:00Z/d TO 2015-08-05T00:00:00Z}'
const match = buildQueryMatchFunction(FORM_AND_DATE_QUERY, '', now)
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:00+04:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01+04:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+02:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T03:00:00+05:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T00:00:00Z' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T00:00:01Z' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T05:00:00+06:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-01T04:30:01-00:30' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2012-08-10T00:00:00+00:00' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:00' } }))
t.ok(match({ f: { id: 'FORM' }, data: { aDate: '2015-08-05T00:00:00+00:30' } }))
t.notOk(match({ f: { id: 'FORM' }, data: { aDate: '2035-08-05T00:00:00+00:30' } }))
t.notOk(match({ data: { aDate: now, nonExistentField: { id: 'FORM' } } }))
t.end()
})
test('query tester reusing shape', (t) => {
t.ok(compareShapes('p:PARENT_1', 'p:PARENT_2'))
t.ok(compareShapes('p:PARENT_1', 'p:PARENT_1'))
t.notOk(compareShapes('p:PARENT_1', 'a:PARENT_1'))
t.notOk(compareShapes('data.field:test*', 'data.field:test'), 'Wildcard queries should be handled differently')
t.ok(compareShapes('data.field:test*', 'data.field:*test'), 'Wildcard queries have same shape for now')
t.notOk(compareShapes('data.field:[* TO 4]', 'data.field:test'), 'range is different to normal terms')
t.ok(compareShapes('data.field:[* TO 4]', 'data.field:[6 TO 9]'), 'range on same field is comparable')
t.notOk(compareShapes('data.field:{* TO 4}', 'data.field:[6 TO 9]'), 'range on same field is NOT comparable with different inclusion boundaries')
t.ok(compareShapes('data.field:true', 'data.field:false'), 'boolean comparisons are same')
t.notOk(compareShapes('data.field:\'false\'', 'data.field:false'), 'boolean comparisons are different to string')
t.notOk(compareShapes('data.field:*', 'data.field:*dsd'), 'any field value is not same as prefix wildcard')
t.notOk(compareShapes('data.field:*', 'data.field:[* TO *]'), 'any field value is NOT the same as open range as termQuotes are not set by query parser')
t.end()
})
function compareShapes(query1, query2) {
const matcher1 = buildQueryMatcher(query1, '')
const matcher2 = buildQueryMatcher(query2, '')
if (matcher1.shapeId == null) {
throw new Error('matcher1 shape is null/undefined')
}
if (matcher2.shapeId == null) {
throw new Error('matcher2 shape is null/undefined')
}
return matcher1.shapeId === matcher2.shapeId
}
test('query reusing same match function hopefully', (t) => {
const matcher1 = buildQueryMatcher('data.field:A')
const matcher2 = buildQueryMatcher('data.field:B')
t.equal(matcher1.matchFunctionId, matcher2.matchFunctionId)
const matcher3 = buildQueryMatcher('data.field:B*')
t.notEqual(matcher1.matchFunctionId, matcher3.matchFunctionId)
t.end()
})
test('Simple OR query', (t) => {
const match = buildQueryMatchFunction('name:hallo OR age:999')
t.ok(match({ name: 'hallo' }))
t.ok(match({ age: 999 }))
t.ok(match({ name: 'hallo', age: 999 }))
t.ok(match({ name: 'xhallo', age: 999 }))
t.ok(match({ name: 'hallo', age: 9990 }))
t.notOk(match({ name: 'hallox', age: 9990 }))
t.notOk(match({ age: 2 }))
t.notOk(match({ name: 'x' }))
t.notOk(match({ name: 'x', data: { name: 'hallo' } }))
t.end()
})
test('Nested OR query', (t) => {
const match = buildQueryMatchFunction('(name:hallo title: ms) (country:NZ OR age:45 OR language:EN)')
t.ok(match({ name: 'hallo', title: 'ms', country: 'nz' }))
t.ok(match({ name: 'hallo', title: 'ms', age: 45 }))
t.ok(match({ name: 'hallo', title: 'ms', language: 'en' }))
t.ok(match({ name: 'hallo', title: 'ms', country: 'de', age: 45 }))
t.notOk(match({ name: 'hallo', title: 'mx', age: 95 }))
t.notOk(match({ name: 'hallo', title: 'ms', age: 95 }))
t.notOk(match({ name: 'hallox', title: 'ms', age: 95 }))
t.notOk(match({ name: 'hallox', title: 'ms' }))
t.notOk(match({ name: 'hallox', title: 'ms', country: 'DE' }))
t.end()
})
test('query tester handles OR queries with one more term following the OR and no parentheses', (t) => {
t.plan(7)
const match = buildQueryMatchFunction('(data.name:hallo OR data.age:999) data.more:m*')
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.ok(match({ data: { name: 'hallo', age: 9999, more: 'more' } }))
t.ok(match({ data: { name: 'nobody', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'hallo', age: 0, more: 'different' } }))
t.ok(match({ data: { name: 'hallo', age: 999, more: 'more' } }))
t.notOk(match({ data: { name: 'nobody', age: 999, more: 'different' } }), 'Neither name matches nor the last condition')
t.ok(match({ data: { name: 'hallo', age: 0, more: ['this-matches', 'different'] } }), 'more matches here as the suffix query sees the non-letter character in front as beginning of a token')
})
test('query can use fluent api', (t) => {
const q = Scopes.global().where('data.age').isInRange(1, 2).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query match date range', (t) => {
const q = Scopes.global().where('data.age').isInRange(1, 2).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for open ranges', (t) => {
const q = Scopes.global().where('data.age').isInRange(1).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for isOneOf', (t) => {
const q = Scopes.global().where('data.age').isOneOf([1, 2]).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('query can use fluent api for isOneOf for strings', (t) => {
const q = Scopes.global().where('data.name').isOneOf(['something', 'hallo']).toQuery()
const matcher = new QueryMatcher(q)
t.ok(matcher.match(MATCHING_OBJECT))
t.end()
})
test('QueryMatcher requires a query', t => {
t.throws(() => QueryMatcher(null))
t.end()
})
function buildQueryMatcher(queryString, filterString) {
const query = parseQueryAndFilter(queryString, filterString)
return new QueryMatcher(query)
}
function buildQueryMatchFunction(queryString, filterString, now) {
const matcher = buildQueryMatcher(queryString, filterString, { nowUtcTz: now })
// console.log('QUERY', queryString, filterString)
// console.log(matcher.shapeId)
// console.dir(matcher.query)
return (d) => matcher.match(d)
}
// test('query tester prioritize', (t) => {
// t.plan(7)
// const match = buildQueryTestFunction(QUERY_SIMPLE_OR_NESTED, FILTER)
// debugPrint(tester.queries)
// t.equal(tester.queries.length, 2, 'should have detected 2 query sets')
// const q1 = tester.queries[0]
// t.equal(q1[0].field, 'data.name', 'name')
// t.equal(q1[1].field, 'data.age', 'age')
// t.equal(q1[1].term, '2', '2')
// const q2 = tester.queries[1]
// t.equal(q2[0].field, 'data.name', 'name')
// t.equal(q2[1].field, 'data.age', 'age')
// t.equal(q2[1].term, '999', '999')
// })
test('identify `root` parent in query', (t) => {
t.plan(2)
const { a, p } = identifySubGraphsInQuery('p:root')
t.deepEqual(a, [], 'No ancestor query term')
t.deepEqual(p, ['root'], 'A single parent query term')
})
test('identify `apps` and `system` ancestor part in OR query', (t) => {
t.plan(2)
const { a, p } = identifySubGraphsInQuery('a:apps OR a:system')
t.deepEqual(a, ['apps', 'system'], 'apps and system have been identified')
t.deepEqual(p, [], 'no parents have been queried')
})
test('identify `root` and `system` parent and ancestor part in query', (t) => {
t.plan(2)
// const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
const { a, p } = identifySubGraphsInQuery('p:root OR a:system')
t.deepEqual(a, ['system'], 'system was identified in ancestors')
t.deepEqual(p, ['root'], 'root was identified as parent')
})
test('identify ids and forms in query', (t) => {
t.plan(2)
// const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
const { ids, f } = identifySubGraphsInQuery('id:system OR f.id:form p:root OR a:system')
t.deepEqual(f, ['form'], 'form was identified')
t.deepEqual(ids, ['system'], 'system was identified')
})
test('identify `system` as id and `system` as ancestor part in query', (t) => {
t.plan(3)
const { a, p, ids } = identifySubGraphsInQuery('id:system OR a:system')
t.deepEqual(a, ['system'], 'system was identified in ancestors')
t.deepEqual(p, [], 'no parents where matched')
t.deepEqual(ids, ['system'], 'system was identified as direct reference')
})
ts
import * as Types from '@aeppic/types'
import { Condition, ExplicitOperator, FieldMatchNode, isFieldMatchNode, MatchExpression, MatchExpressionMultiMatch, QueryParameters } from './types'
import { Index, IndexMap } from '../indexing/document-index'
import { SingleQueryStats } from 'model/indexing/query-stats.js'
import BTreeModule from 'sorted-btree'
const BTree: typeof BTreeModule = (BTreeModule as any).default ?? BTreeModule
type DocumentMatchIteratorWeight = {
type: Symbol
/**
* The cost is an approximation of the number of documents that might need to be enumerated
* and thus require testing
*/
cost: number
isSortable: boolean
}
export const IterateNoDocuments = Symbol('IterateOverNoDocuments')
export const IterateAllDocuments = Symbol('IterateOverAllDocuments')
export const IterateOverSubIterators = Symbol('IterateOverSubIterators')
export const IterateOverId = Symbol('IterateOverId')
export const IterateOverIndex = Symbol('IterateOverIndex')
type DocumentMatchIteratorAllDocuments = DocumentMatchIteratorWeight
type DocumentMatchIteratorUsingId = DocumentMatchIteratorWeight & {
id: string
}
type DocumentMatchIteratorUsingIndex = DocumentMatchIteratorWeight & {
key: string
index: Index
set: Set<Types.DocumentId> | BTreeModule<Types.DocumentId>
}
type DocumentMatchIteratorOverNoDocuments = DocumentMatchIteratorUsingIndex
type DocumentMatchIteratorJoined= DocumentMatchIteratorWeight & {
operator: 'intersect'|'union'
iterators: (DocumentMatchIteratorUsingIndex|DocumentMatchIteratorJoined|DocumentMatchIteratorUsingId)[]
}
function describeIterator(iterator: any): string {
if (iterator.type === IterateOverId) {
return `IterateOverId(${iterator.id})`
} else if (iterator.type === IterateOverIndex) {
return `IterateOverIndex(${iterator.index.fieldPath} = ${iterator.key})`
} else if (iterator.type === IterateAllDocuments) {
return 'IterateAllDocuments'
} else if (iterator.type === IterateNoDocuments) {
return 'IterateNoDocuments'
} else if (iterator.type === KMergedUnion) {
return `KMergedUnion(${iterator.iterators.length} iterators)`
} else if (iterator.type === KMergedIntersection) {
return `KMergedIntersection(${iterator.iterators.length} iterators)`
} else if (iterator.type === IterateOverSubIterators) {
return `IterateOverSubIterators(${iterator.operator} [ ${iterator.iterators.map(i => describeIterator(i)).join(', ')} ]) `
}
return 'Unknown iterator'
}
// New K-Merge Iterator Symbols
export const KMergedUnion = Symbol('KMergedUnion')
export const KMergedIntersection = Symbol('KMergedIntersection')
// Interfaces for K-Merge Iterators
export interface KMergedUnionIterator extends DocumentMatchIteratorWeight {
type: typeof KMergedUnion
iterators: DocumentMatchIterator[] // Sub-iterators that can provide sorted DocumentIds
}
export interface KMergedIntersectionIterator extends DocumentMatchIteratorWeight {
type: typeof KMergedIntersection
iterators: DocumentMatchIterator[] // Sub-iterators that can provide sorted DocumentIds
}
export type DocumentMatchIterator = DocumentMatchIteratorUsingId
| DocumentMatchIteratorWeight
| DocumentMatchIteratorUsingIndex
| DocumentMatchIteratorJoined
| KMergedUnionIterator
| KMergedIntersectionIterator
/**
* A generic MinHeap implementation.
* The heap stores elements of type T and orders them based on a provided comparison function.
*/
class MinHeap<T> {
private heap: T[] = []
private compareFn: (a: T, b: T) => number
/**
* Creates a new MinHeap.
* @param compareFn A function that compares two elements (a, b) of type T.
* It should return a negative number if a < b, zero if a === b, and a positive number if a > b.
*/
constructor(compareFn: (a: T, b: T) => number) {
this.compareFn = compareFn
}
/**
* Adds an item to the heap.
* @param item The item to add.
*/
public push(item: T): void {
this.heap.push(item)
this.siftUp()
}
/**
* Removes and returns the smallest item from the heap.
* @returns The smallest item, or undefined if the heap is empty.
*/
public pop(): T | undefined {
if (this.isEmpty()) return undefined
const item = this.heap[0]
const last = this.heap.pop()
if (last !== undefined && !this.isEmpty()) {
this.heap[0] = last
this.siftDown()
}
return item
}
/**
* Returns the smallest item from the heap without removing it.
* @returns The smallest item, or undefined if the heap is empty.
*/
public peek(): T | undefined {
return this.heap[0]
}
/**
* Checks if the heap is empty.
* @returns True if the heap is empty, false otherwise.
*/
public isEmpty(): boolean {
return this.heap.length === 0
}
/**
* Returns the number of items in the heap.
* @returns The size of the heap.
*/
public size(): number {
return this.heap.length
}
private siftUp(): void {
let nodeIndex = this.heap.length - 1
while (nodeIndex > 0) {
const parentIndex = Math.floor((nodeIndex - 1) / 2)
if (this.compareFn(this.heap[nodeIndex], this.heap[parentIndex]) < 0) {
[this.heap[nodeIndex], this.heap[parentIndex]] = [this.heap[parentIndex], this.heap[nodeIndex]]
nodeIndex = parentIndex
} else {
break
}
}
}
private siftDown(): void {
let nodeIndex = 0
while (true) {
const leftChildIndex = 2 * nodeIndex + 1
const rightChildIndex = 2 * nodeIndex + 2
let smallestChildIndex = nodeIndex
if (leftChildIndex < this.heap.length && this.compareFn(this.heap[leftChildIndex], this.heap[smallestChildIndex]) < 0) {
smallestChildIndex = leftChildIndex
}
if (rightChildIndex < this.heap.length && this.compareFn(this.heap[rightChildIndex], this.heap[smallestChildIndex]) < 0) {
smallestChildIndex = rightChildIndex
}
if (smallestChildIndex !== nodeIndex) {
[this.heap[nodeIndex], this.heap[smallestChildIndex]] = [this.heap[smallestChildIndex], this.heap[nodeIndex]]
nodeIndex = smallestChildIndex
} else {
break
}
}
}
}
/**
* Optimizes intersection iterators using cost-based analysis at graph creation time.
* This function applies optimization rules to sibling iterators based on their relative costs.
*
* IMPORTANT: This optimization is only safe for intersection queries where post-filtering
* will ensure correctness. It should NOT be used for union queries.
*/
function optimizeSiblingIterators(
iterators: DocumentMatchIterator[],
options?: OptimizationOptions,
stats?: SingleQueryStats,
operatorType: 'intersect' | 'union' = 'intersect'
): DocumentMatchIterator[] {
if (!options?.enableCostBasedShortcut || iterators.length <= 1) {
return iterators;
}
// Never optimize union queries - they need all iterators for correctness
if (operatorType === 'union') {
return iterators;
}
// Sort iterators by cost (smallest first)
const sortedIterators = [...iterators].sort((a, b) => a.cost - b.cost);
const selectivityThreshold = options.selectivityThreshold ?? 100;
const costThresholdMultiplier = options.costThresholdMultiplier ?? 10;
// Apply optimization rules as described:
// 1. If any iterator is empty, replace all with empty
if (sortedIterators.some(it => it.type === IterateNoDocuments || it.cost === 0)) {
stats?.mark('sibling-optimization:empty-iterator-found');
return [{ type: IterateNoDocuments, cost: 0, isSortable: true }];
}
// 2. For intersection: Only apply aggressive optimization if we can guarantee correctness
// Check first iterator against selectivity threshold - but keep more conservative
const firstIterator = sortedIterators[0];
// If the first iterator is already very selective, we can optimize
if (firstIterator.cost < selectivityThreshold) {
stats?.mark('sibling-optimization:selective-first-iterator', {
firstCost: firstIterator.cost,
selectivityThreshold,
totalIterators: sortedIterators.length
});
// Record in the format expected by tests
stats?.mark('graph-optimization:intersection-iterators-removed', {
originalCount: sortedIterators.length,
optimizedCount: 1,
removedCount: sortedIterators.length - 1,
mostSelectiveCost: firstIterator.cost,
selectivityThreshold,
costThresholdMultiplier,
mergeCostEstimate: sortedIterators.reduce((sum, it) => sum + it.cost, 0),
triggers: {
isHighlySelective: true,
isBelowCostThreshold: false,
primaryTrigger: 'highly-selective'
}
});
return [firstIterator];
}
// Less aggressive pairwise comparison
let optimizedIterators = [...sortedIterators];
for (let i = 0; i < sortedIterators.length - 1; i++) {
const currentIterator = sortedIterators[i];
const nextIterator = sortedIterators[i + 1];
// Much more conservative threshold - only remove if the cost difference is extreme
const costRatio = nextIterator.cost / currentIterator.cost; // E.g 1000 / 100 => 10.0
const nextIteratorIsMuchMoreExpensive = costRatio > costThresholdMultiplier; // E.g 10.0 > 10 => true
if (nextIteratorIsMuchMoreExpensive) {
stats?.mark('sibling-optimization:cost-threshold-hit', {
currentCost: currentIterator.cost,
nextCost: nextIterator.cost,
costThresholdMultiplier,
ratio: costRatio,
removedCount: sortedIterators.length - (i + 1)
});
// Record in the format expected by tests
stats?.mark('graph-optimization:intersection-iterators-removed', {
originalCount: sortedIterators.length,
optimizedCount: i + 1,
removedCount: sortedIterators.length - (i + 1),
mostSelectiveCost: sortedIterators[0].cost,
selectivityThreshold,
costThresholdMultiplier,
mergeCostEstimate: sortedIterators.reduce((sum, it) => sum + it.cost, 0),
triggers: {
isHighlySelective: false,
isBelowCostThreshold: true,
primaryTrigger: 'below-cost-threshold'
}
});
// Remove all iterators from position i+1 onwards
optimizedIterators = sortedIterators.slice(0, i + 1);
break;
}
}
return optimizedIterators;
}
/**
* Represents an element stored in the MinHeap during a K-Way Union operation.
* It's a tuple containing:
* - `documentId`: The current DocumentId from the sub-iterator.
* - `iterator`: The sub-iterator (generator) itself.
* - `iteratorIndex`: The original index of the sub-iterator (for debugging or tracking purposes).
*/
type KWayUnionHeapElement = [
documentId: Types.DocumentId,
iterator: Generator<Types.DocumentId, void, unknown>,
iteratorIndex: number
];
/**
* Creates a generator that yields sorted DocumentIds from a given DocumentMatchIterator.
* This function is crucial for providing sorted streams to k-way merge algorithms.
* It handles various iterator types, including those based on B-Trees, Sets, and recursively
* calls k-merge enumeration functions if it encounters KMergedUnion or KMergedIntersection iterators.
*
* @param iterator The DocumentMatchIterator to process.
* @param allDocuments Optional map of all documents, used by IterateAllDocuments to get a sorted list of all IDs.
* @yields {Types.DocumentId} DocumentIds in sorted order.
* @throws Error if an unsupported iterator type is encountered or if an old IterateOverSubIterators is found.
*/
export function* getDocumentIdGenerator(iterator: DocumentMatchIterator, allDocuments?: Map<string, Types.Document>): Generator<Types.DocumentId, void, unknown> {
if (isIteratorUsingIndexOfDocuments(iterator)) {
if (iterator.index.isBTree) {
// For BTree used as a Set, document IDs are stored as keys (with null values)
for (const docId of iterator.set.keys()) {
yield docId
}
} else if (iterator.set instanceof Map) {
// For Map, use values() for document IDs
for (const docId of iterator.set.values()) {
yield docId
}
} else if (iterator.set instanceof Set) {
// For standard Set, sort DocumentIds. This can be costly for large sets.
const sortedIds = [...iterator.set.values()].sort()
for (const docId of sortedIds) {
yield docId
}
} else {
console.warn('Unhandled set type in getDocumentIdGenerator for IterateOverIndex, assuming iterable and sorted:', iterator.set)
for (const docId of iterator.set.values()) {
yield docId
}
}
} else if (isIteratorUsingId(iterator)) {
yield iterator.id
} else if (iterator.type === IterateNoDocuments) {
// Yield nothing
} else if (iterator.type === KMergedUnion) {
yield* enumerateKMergedUnion(iterator as KMergedUnionIterator, allDocuments)
} else if (iterator.type === KMergedIntersection) {
yield* enumerateKMergedIntersection(iterator as KMergedIntersectionIterator, allDocuments)
} else if (iterator.type === IterateAllDocuments && allDocuments) {
const sortedIds = [...allDocuments.keys()].sort()
for (const id of sortedIds) {
yield id
}
} else if (isIteratorUsingJoinsOfIterators(iterator)) {
// Handle old IterateOverSubIterators by recursively processing sub-iterators
if (iterator.operator === 'union') {
// For union: yield all documents from all sub-iterators (with deduplication handled by caller)
for (const subIterator of iterator.iterators) {
yield* getDocumentIdGenerator(subIterator, allDocuments)
}
} else if (iterator.operator === 'intersect') {
// For intersection: this is more complex and expensive without sorting
// Fall back to collecting all IDs and computing intersection manually
const subIteratorIds: Set<Types.DocumentId>[] = []
for (const subIterator of iterator.iterators) {
const ids = new Set<Types.DocumentId>()
for (const docId of getDocumentIdGenerator(subIterator, allDocuments)) {
ids.add(docId)
}
subIteratorIds.push(ids)
}
if (subIteratorIds.length === 0) return
// Find intersection of all sets
let intersection = subIteratorIds[0]
for (let i = 1; i < subIteratorIds.length; i++) {
const newIntersection = new Set<Types.DocumentId>()
for (const docId of intersection) {
if (subIteratorIds[i].has(docId)) {
newIntersection.add(docId)
}
}
intersection = newIntersection
}
// Yield intersection in sorted order
const sortedIds = [...intersection].sort()
for (const docId of sortedIds) {
yield docId
}
} else {
throw new Error(`Unsupported operator for old IterateOverSubIterators: ${iterator.operator}`)
}
} else {
// Consider other iterator types or throw an error for unsupported ones
// For IterateAllDocuments without allDocuments, it's problematic.
throw new Error(`Unsupported iterator type for getDocumentIdGenerator: ${String(iterator.type)}`)
}
}
/**
* Performs a K-Way Merge Union of DocumentIds from multiple sub-iterators.
* It expects that `getDocumentIdGenerator` can provide sorted DocumentIds from each sub-iterator.
* Uses a MinHeap to efficiently find the next smallest unique DocumentId across all sub-iterators.
*
* @param iterator The KMergedUnionIterator containing the sub-iterators to merge.
* @param allDocuments Optional map of all documents, passed down to `getDocumentIdGenerator`.
* @yields {Types.DocumentId} Unique DocumentIds in sorted order from the union of sub-iterators.
*/
export function* enumerateKMergedUnion(iterator: KMergedUnionIterator, allDocuments?: Map<string, Types.Document>, stats?: SingleQueryStats): Generator<Types.DocumentId, void, unknown> {
if (iterator.iterators.length === 0) return
if (iterator.iterators.length === 1) {
yield* getDocumentIdGenerator(iterator.iterators[0], allDocuments)
return
}
const heap = new MinHeap<KWayUnionHeapElement>((a, b) => {
if (a[0] < b[0]) return -1;
if (a[0] > b[0]) return 1;
return 0;
});
// Initialize the heap with the first element from each sub-iterator
for (let i = 0; i < iterator.iterators.length; i++) {
const subIteratorGenerator = getDocumentIdGenerator(iterator.iterators[i], allDocuments);
const firstElement = subIteratorGenerator.next();
if (!firstElement.done) {
heap.push([firstElement.value as string, subIteratorGenerator, i]);
}
}
let lastYieldedDocumentId: Types.DocumentId | null = null;
let heapOperations = 0;
let documentsYielded = 0;
while (!heap.isEmpty()) {
heapOperations++;
const heapTopElement = heap.pop()!; // [documentId, subIteratorGenerator, originalIteratorIndex]
const currentDocumentId = heapTopElement[0];
const currentSubGenerator = heapTopElement[1];
const originalIteratorIndex = heapTopElement[2];
if (currentDocumentId !== lastYieldedDocumentId) {
documentsYielded++;
yield currentDocumentId;
lastYieldedDocumentId = currentDocumentId;
}
// Get the next element from the sub-iterator that provided the top element
const nextElement = currentSubGenerator.next();
if (!nextElement.done) {
heapOperations++;
heap.push([nextElement.value as string, currentSubGenerator, originalIteratorIndex]);
}
}
stats?.mark('k-merge-union-stats', {
heapOperations,
documentsYielded,
opsPerDocument: documentsYielded > 0 ? heapOperations / documentsYielded : 0,
iteratorCount: iterator.iterators.length
});
}
/**
* Performs a K-Way Merge Intersection of DocumentIds from multiple sub-iterators.
* It expects that `getDocumentIdGenerator` can provide sorted DocumentIds from each sub-iterator.
* Uses a multi-pointer strategy: advances iterators whose current DocumentId is less than the
* maximum current DocumentId across all iterators until all point to the same ID (which is then yielded),
* or an iterator is exhausted.
*
* @param iterator The KMergedIntersectionIterator containing the sub-iterators to intersect.
* @param allDocuments Optional map of all documents, passed down to `getDocumentIdGenerator`.
* @yields {Types.DocumentId} DocumentIds in sorted order that are common to all sub-iterators.
*/
export function* enumerateKMergedIntersection(iterator: KMergedIntersectionIterator, allDocuments?: Map<string, Types.Document>, stats?: SingleQueryStats): Generator<Types.DocumentId, void, unknown> {
if (iterator.iterators.length === 0) return
// Optimization: if any sub-iterator is IterateNoDocuments, intersection is empty
if (iterator.iterators.some(it => it.type === IterateNoDocuments)) return
if (iterator.iterators.length === 1) {
yield* getDocumentIdGenerator(iterator.iterators[0], allDocuments)
return
}
const subIteratorGenerators: Generator<Types.DocumentId, void, unknown>[] = [];
const currentDocumentIdsFromIterators: (Types.DocumentId | null)[] = []; // Stores the current DocumentId from each sub-iterator
// Initialize generators and populate currentDocumentIdsFromIterators with the first ID from each
for (const subIterator of iterator.iterators) {
const docIdGenerator = getDocumentIdGenerator(subIterator, allDocuments);
subIteratorGenerators.push(docIdGenerator);
const firstElement = docIdGenerator.next();
if (firstElement.done) {
// If any sub-iterator is empty, the intersection is empty.
return;
}
currentDocumentIdsFromIterators.push(firstElement.value as string);
}
let iteratorAdvances = 0;
let comparisonOperations = 0;
let documentsYielded = 0;
mainLoop: while (true) {
let allDocumentIdsAreIdentical = true;
let maximumDocumentId = currentDocumentIdsFromIterators[0]!;
// Check if all current document IDs are the same and find the maximum ID
for (let i = 0; i < currentDocumentIdsFromIterators.length; i++) {
const currentId = currentDocumentIdsFromIterators[i];
if (currentId === null) return; // Should ideally not happen if logic is correct
comparisonOperations++;
if (i > 0 && currentId !== currentDocumentIdsFromIterators[0]) {
allDocumentIdsAreIdentical = false;
}
if (currentId > maximumDocumentId) {
maximumDocumentId = currentId;
}
}
if (allDocumentIdsAreIdentical) {
// All iterators point to the same DocumentId, so yield it.
documentsYielded++;
yield currentDocumentIdsFromIterators[0]!;
// Advance all iterators to their next DocumentId.
for (let i = 0; i < subIteratorGenerators.length; i++) {
iteratorAdvances++;
const nextElement = subIteratorGenerators[i].next();
if (nextElement.done) {
// If any iterator is exhausted, the intersection is complete.
stats?.mark('k-merge-intersection-stats', {
iteratorAdvances,
comparisonOperations,
documentsYielded,
advancesPerDocument: documentsYielded > 0 ? iteratorAdvances / documentsYielded : 0,
comparisonsPerDocument: documentsYielded > 0 ? comparisonOperations / documentsYielded : 0,
iteratorCount: iterator.iterators.length
});
return;
}
currentDocumentIdsFromIterators[i] = nextElement.value as string;
}
} else {
// Not all DocumentIds are the same. Advance iterators whose current DocumentId is less than maximumDocumentId.
for (let i = 0; i < subIteratorGenerators.length; i++) {
while (currentDocumentIdsFromIterators[i]! < maximumDocumentId) {
iteratorAdvances++;
const nextElement = subIteratorGenerators[i].next();
if (nextElement.done) {
// If any iterator is exhausted while trying to catch up, the intersection is complete.
stats?.mark('k-merge-intersection-stats', {
iteratorAdvances,
comparisonOperations,
documentsYielded,
advancesPerDocument: documentsYielded > 0 ? iteratorAdvances / documentsYielded : 0,
comparisonsPerDocument: documentsYielded > 0 ? comparisonOperations / documentsYielded : 0,
iteratorCount: iterator.iterators.length
});
return;
}
currentDocumentIdsFromIterators[i] = nextElement.value as string;
}
// If an iterator, after advancing, has a DocumentId greater than the current maximumDocumentId,
// it means the previous maximumDocumentId was not a common value.
// The loop will continue, and a new maximumDocumentId will be determined in the next iteration.
}
}
}
}
export function isEmptyIterator(iterator: DocumentMatchIteratorWeight): boolean {
return iterator.cost === 0 || iterator.type === IterateNoDocuments
}
export function isIteratorOverAllDocuments(iterator: DocumentMatchIteratorWeight): boolean {
return iterator.type === IterateAllDocuments || iterator.cost === Number.MAX_SAFE_INTEGER
}
export function isIteratorUsingIndexOfDocuments(iterator: DocumentMatchIteratorWeight): iterator is DocumentMatchIteratorUsingIndex {
return (iterator.type === IterateOverIndex)
}
export function *enumerateAllDocumentsInIndexIterator(iterator: DocumentMatchIteratorUsingIndex) {
if (iterator.type === IterateOverIndex) {
for (const documentId of iterator.set.values()) {
yield documentId
}
} else if (isIteratorUsingJoinsOfIterators(iterator)) {
for (const subIterator of iterator.iterators) {
yield *enumerateAllDocumentsInIndexIterator(subIterator as DocumentMatchIteratorUsingIndex)
}
} else {
throw new Error('Not supported')
}
}
export function iteratorContains(iterator: DocumentMatchIteratorUsingIndex, documentId: Types.DocumentId): boolean {
return iterator.set.has(documentId)
}
export function isIteratorUsingJoinsOfIterators(iterator: DocumentMatchIteratorWeight): iterator is DocumentMatchIteratorJoined {
return iterator.type === IterateOverSubIterators
}
export function isIteratorUsingId(iterator: DocumentMatchIteratorWeight): iterator is DocumentMatchIteratorUsingId {
return iterator.type === IterateOverId && 'id' in iterator
}
export function isIteratorOverNoDocuments(iterator: DocumentMatchIteratorWeight): iterator is DocumentMatchIteratorOverNoDocuments {
return iterator.type === IterateNoDocuments || iterator.cost === 0
}
export interface OptimizationOptions {
/**
* Enable cost-based optimization for query intersection processing.
*
* ## What it does:
* When enabled, the query engine can remove high-cost iterators from intersection (AND)
* queries when a highly selective iterator is present. This reduces the computational
* cost of k-way merge operations by eliminating expensive document enumeration.
*
* ## Why it's safe:
* Iterator removal is semantically safe because:
* 1. **Post-filtering ensures correctness**: The removed conditions are still enforced
* by match.ts during the final document filtering phase
* 2. **Superset enumeration**: The remaining iterator(s) return a superset of documents
* that potentially match the query, then post-filtering removes false positives
* 3. **Performance vs correctness trade-off**: We exchange some enumeration overhead
* for significantly reduced k-merge complexity
*
* ## Optimization applies to:
* - ✅ Intersection queries (AND): Safe to remove expensive iterators
* - ❌ Union queries (OR): Never optimized - would change query semantics
*
* @default false
*/
enableCostBasedShortcut?: boolean
/**
* Fixed threshold for identifying highly selective iterators.
*
* ## What it does:
* If an iterator's cost (estimated number of documents) is below this threshold,
* it's considered "highly selective" and triggers optimization. Other iterators
* in the intersection may be removed to reduce k-merge overhead.
*
* ## When to use:
* - Set higher (e.g., 5000) for aggressive optimization in large datasets
* - Set lower (e.g., 100) for conservative optimization
* - Use when you have predictable selective queries (e.g., ID lookups, status filters)
*
* ## Safety:
* This trigger is conservative - only activates when there's a genuinely selective
* iterator that can efficiently enumerate a small candidate set. Post-filtering
* via match.ts ensures all original conditions are still enforced.
*
* @default 100
*/
selectivityThreshold?: number
/**
* Cost ratio threshold for pairwise iterator optimization decisions.
*
* ## What it does:
* When comparing consecutive iterators (sorted by cost), removes an iterator
* and all subsequent ones if the cost ratio exceeds this threshold.
*
* ## How it works:
* - Higher multiplier (e.g., 100) = more aggressive optimization
* - Lower multiplier (e.g., 2) = very conservative optimization
* - Formula: `nextIteratorCost / currentIteratorCost > costThresholdMultiplier`
*
* ## Use cases:
* - High multiplier (100): Keep iterators even if they're much more expensive
* - Low multiplier (2): Remove iterators that are just 2x more expensive
* - Medium multiplier (10): Balanced approach, removes very expensive iterators
*
* ## Example:
* Iterator costs [100, 500, 5000] with multiplier 10:
* - First comparison: 500/100 = 5 ≤ 10 → keep both iterators
* - Second comparison: 5000/500 = 10 ≤ 10 → keep all iterators
* - If multiplier was 5: 5000/500 = 10 > 5 → remove the 5000-cost iterator
*
* ## Safety:
* This optimization is safe for intersection queries because post-filtering
* ensures correctness. Higher multipliers are safer but may reduce optimization.
*
* @default 10 (remove iterators 10x more expensive than the previous one)
*/
costThresholdMultiplier?: number
}
// Legacy type alias for backward compatibility
export type EnumerationOptions = OptimizationOptions
export function* enumerateGraphIterator(iterator: DocumentMatchIterator, allDocuments: Map<string, Types.Document>, stats?: SingleQueryStats): IterableIterator<Types.Document> {
const documentsAlreadyReturned = new Set<string>()
const info = {
enumerated: 0,
duplicates: 0,
}
stats?.phase('enumerate-graph', { iterator: describeIterator(iterator), info })
const ops = stats?.newOpStats('enumerate-graph:yield')
for (const document of _enumerateGraphIterator(iterator, allDocuments, stats)) {
info.enumerated++
const id = document.id
ops?.beginOp()
if (!documentsAlreadyReturned.has(id)) {
documentsAlreadyReturned.add(id)
yield document
} else {
info.duplicates++
}
ops?.endOp()
}
}
function* _enumerateGraphIterator(
iterator: DocumentMatchIterator,
allDocuments: Map<string, Types.Document>,
stats?: SingleQueryStats
): IterableIterator<Types.Document> {
if (isIteratorOverNoDocuments(iterator)) {
return;
} else if (isIteratorUsingId(iterator)) {
const document = allDocuments.get(iterator.id);
if (document) yield document;
} else if (isIteratorUsingIndexOfDocuments(iterator)) {
for (const documentId of iterator.set.keys()) {
const document = allDocuments.get(documentId);
if (document) yield document;
}
} else if (iterator.type === KMergedUnion) {
// Use the efficient k-merge union implementation
for (const documentId of enumerateKMergedUnion(iterator as KMergedUnionIterator, allDocuments, stats)) {
const document = allDocuments.get(documentId);
if (document) {
stats?.increment('enumerated:union:k-merge');
yield document;
}
}
} else if (iterator.type === KMergedIntersection) {
// Use the efficient k-merge intersection implementation
for (const documentId of enumerateKMergedIntersection(iterator as KMergedIntersectionIterator, allDocuments, stats)) {
const document = allDocuments.get(documentId);
if (document) {
stats?.increment('enumerated:intersection:k-merge');
yield document;
}
}
} else if (isIteratorUsingJoinsOfIterators(iterator)) {
if (iterator.iterators.length === 1) {
yield* _enumerateGraphIterator(iterator.iterators[0], allDocuments, stats);
} else if (iterator.operator === 'intersect' && iterator.isSortable) {
yield* _enumerateSortedIntersection(iterator.iterators, allDocuments, stats);
} else if (iterator.operator === 'union' && iterator.isSortable) {
yield* _enumerateSortedUnion(iterator.iterators, allDocuments, stats);
} else if (iterator.operator === 'intersect') {
yield* enumerateIntersectionOfIterators(iterator.iterators, allDocuments, stats);
} else if (iterator.operator === 'union') {
yield* enumerateUnionOfIterators(iterator.iterators, allDocuments, stats);
}
} else if (isIteratorOverAllDocuments(iterator)) {
yield* allDocuments.values();
} else {
throw new Error('Unsupported iterator');
}
}
export function buildIteratorForGraph(
node: MatchExpression | Condition,
parameters: QueryParameters,
indexes: IndexMap,
stats?: SingleQueryStats,
options?: OptimizationOptions,
isTopLevelCall = true
): DocumentMatchIterator {
let iterator: DocumentMatchIterator;
if (isFieldMatchNode(node)) {
iterator = buildIteratorForFieldMatch(node, parameters, indexes, stats);
} else if ('conditions' in node && node.conditions.length === 0) {
stats?.mark('empty-conditions');
iterator = { cost: 0, type: IterateNoDocuments, isSortable: true };
} else if ('conditions' in node && node.conditions.length === 1) {
iterator = buildIteratorForGraph(node.conditions[0], parameters, indexes, stats, options, false);
} else if ('operator' in node) {
iterator = buildIteratorForJoinedCondition(node as MatchExpressionMultiMatch, parameters, indexes, stats, options);
} else {
throw new Error(`Not supported ${JSON.stringify(node)}`);
}
if (isTopLevelCall) {
stats?.phase('final-query-structure', { plan: describeIterator(iterator) });
}
return iterator;
}
function buildIteratorForFieldMatch(condition: FieldMatchNode, parameters: QueryParameters, indexes: IndexMap, stats?: SingleQueryStats): DocumentMatchIterator {
// Handle NOT queries (termPrefix: "-")
if (condition.termPrefix === '-') {
return buildIteratorForNotFieldMatch(condition, parameters, indexes, stats)
}
if (condition.field === 'id') {
const key = parameters[condition.term.id]
return {
type: IterateOverId,
cost: 1,
id: key,
isSortable: true,
}
}
const matchingIndex = indexes[condition.field]
if (matchingIndex) {
if ('term_min' in condition || 'term_max' in condition) {
let keyMin = parameters[condition.term_min?.id]
let keyMax = parameters[condition.term_max?.id]
if (Array.isArray(keyMin) || Array.isArray(keyMax)) {
throw new Error("Not implemented")
}
stats?.mark('range-iterator', { keyMin, keyMax })
return buildIteratorForFieldRange(keyMin, keyMax, matchingIndex, stats)
} else if ('term' in condition) {
const key = parameters[condition.term.id]
if (Array.isArray(key)) {
let cost = 0
const subIterators = []
stats?.mark('multi-match')
for (const value of key) {
const subIterator = buildIteratorForFieldValue(value, matchingIndex, stats)
cost += subIterator.cost
subIterators.push(subIterator)
}
// For multi-value fields, this is a union operation - do not apply intersection optimizations
const optimizedSubIterators = subIterators; // Keep all iterators for union
const optimizedCost = cost;
// Use K-merge union if all iterators are sortable
if (optimizedSubIterators.every(it => it.isSortable)) {
return {
type: KMergedUnion,
cost: optimizedCost,
iterators: optimizedSubIterators,
isSortable: true
}
} else {
return {
type: IterateOverSubIterators,
operator: 'union',
cost: optimizedCost,
iterators: optimizedSubIterators as (DocumentMatchIteratorUsingId | DocumentMatchIteratorUsingIndex | DocumentMatchIteratorJoined)[],
isSortable: false
}
}
} else {
return buildIteratorForFieldValue(key, matchingIndex, stats)
}
}
throw new Error('Not supported')
} else {
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
}
export function buildIteratorForFieldRange(fieldMin: any, fieldMax: any, index: Index, stats?: SingleQueryStats): DocumentMatchIterator {
if (fieldMin === '*' && fieldMax === '*') {
stats?.mark('all-documents-iterator')
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
if (typeof fieldMin === 'string') {
if (fieldMin === '*') {
fieldMin = '\x00'; // Represents the smallest possible string
} else if (fieldMin.endsWith('*')) {
fieldMin = fieldMin.slice(0, -1)
}
}
if (typeof fieldMax === 'string') {
if (fieldMax === '*') {
fieldMax = '\uffff'; // Represents a very large string (for comparison)
} else if (fieldMax.endsWith('*')) {
fieldMax = fieldMax.slice(0, -1)
}
}
const entriesWithKeys = index.getEntriesForRange(fieldMin, fieldMax);
if (entriesWithKeys && entriesWithKeys.length > 0) {
const subIterators: DocumentMatchIteratorUsingIndex[] = [];
for (const [key, setOfDocuments] of entriesWithKeys) {
if (setOfDocuments.size > 0) {
const numberOfDocuments = setOfDocuments.size;
subIterators.push({
type: IterateOverIndex,
index,
key: key as string,
cost: numberOfDocuments,
set: setOfDocuments,
isSortable: index.isBTree
});
}
}
const subIteratorsMatched = subIterators.length
if (subIterators.length === 0) {
stats?.mark('range-iterator:none', { fieldMin, fieldMax, cost: 0 })
return { type: IterateNoDocuments, index, key: fieldMin ?? fieldMax, cost: 0, isSortable: true };
}
if (subIterators.length === 1) {
stats?.mark('range-iterator:single', { fieldMin, fieldMax, cost: subIterators[0].cost})
return subIterators[0];
}
const totalCost = subIterators.reduce((acc, iterator) => acc + iterator.cost, 0);
stats?.mark('range-iterator:union', { fieldMin, fieldMax, subIteratorsMatched, cost: totalCost })
// For range queries, this is a union operation - do not apply intersection optimizations
const optimizedSubIterators = subIterators; // Keep all iterators for union
const optimizedCost = totalCost;
// Use K-merge union if all iterators are sortable
if (optimizedSubIterators.every(it => it.isSortable)) {
return {
type: KMergedUnion,
cost: optimizedCost,
iterators: optimizedSubIterators,
isSortable: true
};
} else {
return {
type: IterateOverSubIterators,
operator: 'union',
cost: optimizedCost,
iterators: optimizedSubIterators as (DocumentMatchIteratorUsingId | DocumentMatchIteratorUsingIndex | DocumentMatchIteratorJoined)[],
isSortable: false
};
}
} else {
return { type: IterateNoDocuments, index, key: fieldMin ?? fieldMax, cost: 0, isSortable: true };
}
}
export function buildIteratorForFieldValue(fieldValue: any, index: Index, stats?: SingleQueryStats): DocumentMatchIterator {
if (typeof fieldValue === 'string' && fieldValue.startsWith('*')) {
stats?.mark('all-documents-iterator')
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
if (typeof fieldValue === 'string' && fieldValue.endsWith('*')) {
fieldValue = fieldValue.slice(0, -1)
}
const setOfDocuments = index.getDocumentIdsForValue(fieldValue)
if (setOfDocuments) {
const numberOfDocuments = setOfDocuments.size
const it = { type: IterateOverIndex, index, key: fieldValue, cost: numberOfDocuments, set: setOfDocuments, isSortable: index.isBTree }
stats?.mark('field-value-iterator', { cost: it.cost, iterator: describeIterator(it) })
return it
} else {
stats?.mark('field-value-iterator:none')
return { type: IterateNoDocuments, index, key: fieldValue, cost: 0, isSortable: true }
}
}
function buildIteratorForNotFieldMatch(condition: FieldMatchNode, parameters: QueryParameters, indexes: IndexMap, stats?: SingleQueryStats): DocumentMatchIterator {
// For id field, NOT queries can't be optimized easily, fall back to all documents
if (condition.field === 'id') {
stats?.mark('not-query:id-field-all-documents')
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
const matchingIndex = indexes[condition.field]
if (!matchingIndex) {
// No index available, must iterate all documents
stats?.mark('not-query:no-index-all-documents')
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
if ('term_min' in condition || 'term_max' in condition) {
// NOT range queries are complex, fall back to all documents for now
stats?.mark('not-query:range-all-documents')
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false }
}
if ('term' in condition) {
const excludeKey = parameters[condition.term.id]
if (Array.isArray(excludeKey)) {
// NOT with multiple values - enumerate all other values
return buildIteratorForNotMultipleValues(excludeKey, matchingIndex, stats)
} else {
// NOT with single value - enumerate all other values
return buildIteratorForNotSingleValue(excludeKey, matchingIndex, stats)
}
}
throw new Error('NOT query not supported for this field type')
}
function buildIteratorForNotSingleValue(excludeValue: any, index: Index, stats?: SingleQueryStats): DocumentMatchIterator {
stats?.mark('not-query:single-value-complement')
// Get all values from the index except the excluded one
const allValues = Array.from((index as any)._valuesToDocumentIds.keys())
stats?.mark('not-query:single-value-complement', { allValuesCount: allValues.length, excludeValue })
const complementValues = allValues.filter(value => value !== excludeValue)
if (complementValues.length === 0) {
// All documents have the excluded value, so NOT query matches nothing
return { type: IterateNoDocuments, index, key: excludeValue, cost: 0, isSortable: true }
}
// Create iterators for all complement values
const subIterators = complementValues.map(value => buildIteratorForFieldValue(value, index, stats))
// For NOT queries, this is a union operation - do not apply intersection optimizations
const optimizedSubIterators = subIterators; // Keep all iterators for union
const optimizedCost = optimizedSubIterators.reduce((sum, it) => sum + it.cost, 0);
// Use K-merge union if all iterators are sortable
if (optimizedSubIterators.every(it => it.isSortable)) {
return {
type: KMergedUnion,
cost: optimizedCost,
iterators: optimizedSubIterators,
isSortable: true
}
} else {
return {
type: IterateOverSubIterators,
operator: 'union',
cost: optimizedCost,
iterators: optimizedSubIterators as (DocumentMatchIteratorUsingId | DocumentMatchIteratorUsingIndex | DocumentMatchIteratorJoined)[],
isSortable: false
}
}
}
function buildIteratorForNotMultipleValues(excludeValues: any[], index: Index, stats?: SingleQueryStats): DocumentMatchIterator {
stats?.mark('not-query:multiple-values-complement')
// Get all values from the index except the excluded ones
const excludeSet = new Set(excludeValues)
const allValues = Array.from((index as any)._valuesToDocumentIds.keys())
const complementValues = allValues.filter(value => !excludeSet.has(value))
if (complementValues.length === 0) {
// All documents have one of the excluded values, so NOT query matches nothing
return { type: IterateNoDocuments, index, key: excludeValues[0], cost: 0, isSortable: true }
}
// Create iterators for all complement values
const subIterators = complementValues.map(value => buildIteratorForFieldValue(value, index, stats))
// For NOT queries, this is a union operation - do not apply intersection optimizations
const optimizedSubIterators = subIterators; // Keep all iterators for union
const optimizedCost = optimizedSubIterators.reduce((sum, it) => sum + it.cost, 0);
// Use K-merge union if all iterators are sortable
if (optimizedSubIterators.every(it => it.isSortable)) {
return {
type: KMergedUnion,
cost: optimizedCost,
iterators: optimizedSubIterators,
isSortable: true
}
} else {
return {
type: IterateOverSubIterators,
operator: 'union',
cost: optimizedCost,
iterators: optimizedSubIterators as (DocumentMatchIteratorUsingId | DocumentMatchIteratorUsingIndex | DocumentMatchIteratorJoined)[],
isSortable: false
}
}
}
export function buildIteratorForJoinedCondition(node: MatchExpressionMultiMatch, parameters: QueryParameters, indexes: IndexMap, stats?: SingleQueryStats, options?: OptimizationOptions): DocumentMatchIterator {
if (node.operator === 'AND') {
return buildIntersectionIterator(node.conditions, parameters, indexes, stats, options)
} else if (node.operator === 'OR') {
return buildUnionIterator(node.conditions, parameters, indexes, stats)
} else {
throw new Error('Unsupported iterator')
}
}
export function buildIntersectionIterator(
conditions: Condition[],
parameters: QueryParameters,
indexes: IndexMap,
stats?: SingleQueryStats,
options?: OptimizationOptions
): DocumentMatchIteratorJoined | DocumentMatchIteratorOverNoDocuments | DocumentMatchIteratorAllDocuments {
const iteratorsToJoin = conditions.map(c => buildIteratorForGraph(c, parameters, indexes, stats, options, false));
// Flatten nested intersections and collect all iterators
const flattenedIterators: (DocumentMatchIteratorUsingId | DocumentMatchIteratorJoined | DocumentMatchIteratorUsingIndex)[] = [];
let minimumCost = Number.MAX_SAFE_INTEGER;
for (const iterator of iteratorsToJoin) {
if (isIteratorOverNoDocuments(iterator)) {
stats?.mark('intersection-iterator:none');
return iterator;
}
if (isIteratorOverAllDocuments(iterator)) {
// Skip "all documents" iterators in intersection - they don't constrain the result
continue;
}
// Flatten nested intersections
if (isIteratorUsingJoinsOfIterators(iterator) && iterator.operator === 'intersect') {
flattenedIterators.push(...iterator.iterators);
if (iterator.cost < minimumCost) {
minimumCost = iterator.cost;
}
} else if (
isIteratorUsingId(iterator) ||
isIteratorUsingIndexOfDocuments(iterator) ||
isIteratorUsingJoinsOfIterators(iterator) ||
iterator.type === KMergedUnion ||
iterator.type === KMergedIntersection
) {
flattenedIterators.push(iterator as any);
if (iterator.cost < minimumCost) {
minimumCost = iterator.cost;
}
} else {
console.warn('Unsupported iterator type for intersection:', iterator);
stats?.mark('intersection-iterator:unsupported', { iterator: describeIterator(iterator) });
continue;
}
}
// Apply optimization at this level for intersection operations
let optimizedIterators = flattenedIterators;
if (options?.enableCostBasedShortcut && flattenedIterators.length > 1) {
optimizedIterators = optimizeSiblingIterators(flattenedIterators, options, stats, 'intersect') as (DocumentMatchIteratorUsingId | DocumentMatchIteratorJoined | DocumentMatchIteratorUsingIndex)[];
// Update minimum cost based on optimized iterators
if (optimizedIterators.length !== flattenedIterators.length) {
minimumCost = Math.min(...optimizedIterators.map(it => it.cost));
}
}
if (optimizedIterators.length === 0) {
stats?.mark('intersection-iterator:all-documents');
return { type: IterateAllDocuments, cost: Number.MAX_SAFE_INTEGER, isSortable: false };
}
if (optimizedIterators.length === 1) {
const singleIterator = optimizedIterators[0];
stats?.mark('intersection-iterator:single', { iterator: describeIterator(singleIterator), cost: singleIterator.cost });
return singleIterator;
}
// Sort iterators by cost to optimize enumeration (smallest first)
optimizedIterators.sort((a, b) => a.cost - b.cost);
// Check if we can use k-merge intersection (all iterators are sortable)
const allSortable = optimizedIterators.every((it: any) => it.isSortable);
if (allSortable && optimizedIterators.length > 2) {
stats?.mark('intersection-iterator:k-merge', {
cost: minimumCost,
iteratorCount: optimizedIterators.length,
iterators: optimizedIterators.map((i: any) => ({
type: i.type.description,
cost: i.cost
}))
});
return {
type: KMergedIntersection,
cost: minimumCost,
iterators: optimizedIterators,
isSortable: true
};
}
// Fall back to traditional nested intersection
stats?.mark('intersection-iterator:traditional', {
cost: minimumCost,
iteratorCount: optimizedIterators.length,
iterators: optimizedIterators.map((i: any) => ({
type: i.type.description,
cost: i.cost,
description: describeIterator(i)
}))
});
// Use K-merge intersection if all iterators are sortable
if (allSortable) {
return {
type: KMergedIntersection,
cost: minimumCost,
iterators: optimizedIterators,
isSortable: true
};
} else {
return {
type: IterateOverSubIterators,
operator: 'intersect',
cost: minimumCost,
iterators: optimizedIterators,
isSortable: false
};
}
}
export function buildUnionIterator(conditions: Condition[], parameters: QueryParameters, indexes: IndexMap, stats?: SingleQueryStats): DocumentMatchIteratorJoined|DocumentMatchIteratorAllDocuments {
const iteratorsToJoin = conditions.map(c => buildIteratorForGraph(c, parameters, indexes))
// Flatten nested unions and collect all iterators
const flattenedIterators: (DocumentMatchIteratorJoined|DocumentMatchIteratorUsingIndex|DocumentMatchIteratorUsingId)[] = []
let totalCost = 0
for (const iterator of iteratorsToJoin) {
if (isIteratorOverAllDocuments(iterator)) {
stats?.mark('union-iterator:all-documents', { cost: Number.MAX_SAFE_INTEGER })
return iterator
}
if (isIteratorOverNoDocuments(iterator)) {
// Skip empty iterators in union
continue
}
// Flatten nested unions
if (isIteratorUsingJoinsOfIterators(iterator) && iterator.operator === 'union') {
flattenedIterators.push(...iterator.iterators)
totalCost += iterator.cost
} else {
flattenedIterators.push(iterator as any)
totalCost += (iterator as any).cost
}
}
if (flattenedIterators.length === 0) {
stats?.mark('union-iterator:none')
return { type: IterateNoDocuments, cost: 0, isSortable: true }
}
if (flattenedIterators.length === 1) {
const singleIterator = flattenedIterators[0]
stats?.mark('union-iterator:single', { cost: totalCost, iterator: describeIterator(singleIterator) })
return singleIterator
}
// Check if we can use k-merge union (all iterators are sortable)
const allSortable = flattenedIterators.every(it => it.isSortable)
if (allSortable && flattenedIterators.length > 2) {
stats?.mark('union-iterator:k-merge', {
cost: totalCost,
iteratorCount: flattenedIterators.length,
iterators: flattenedIterators.map(i => ({ type: i.type.description, cost: i.cost }))
})
return {
type: KMergedUnion,
cost: totalCost,
iterators: flattenedIterators,
isSortable: true
}
}
// Fall back to traditional nested union
stats?.mark('union-iterator:traditional', {
cost: totalCost,
iteratorCount: flattenedIterators.length,
iterators: flattenedIterators.map(i => ({ type: i.type.description, cost: i.cost, description: describeIterator(i) }))
})
// Use K-merge union if all iterators are sortable
if (allSortable) {
return {
type: KMergedUnion,
cost: totalCost,
iterators: flattenedIterators,
isSortable: true
}
} else {
return {
type: IterateOverSubIterators,
operator: 'union',
cost: totalCost,
iterators: flattenedIterators,
isSortable: false
}
}
}
function* _enumerateSortedUnion(
iterators: DocumentMatchIterator[],
allDocuments: Map<string, Types.Document>,
stats?: SingleQueryStats
): IterableIterator<Types.Document> {
// Initialize cursors
type Cursor = { iter: IterableIterator<Types.Document>; value: Types.Document | undefined };
const cursors: Cursor[] = iterators
.map(it => {
const iter = _enumerateGraphIterator(it, allDocuments, stats);
const next = iter.next();
return { iter, value: next.done ? undefined : next.value };
})
.filter(c => c.value);
if (cursors.length === 0) return;
while (cursors.length > 0) {
// Find minimum ID
let vmin = cursors[0].value!.id;
for (const c of cursors) {
if (c.value!.id < vmin) vmin = c.value!.id;
}
// Yield the document
stats?.increment('enumerated:union:sorted')
yield cursors.find(c => c.value!.id === vmin)!.value!;
// Advance cursors at vmin
for (let i = 0; i < cursors.length; ) {
if (cursors[i].value!.id === vmin) {
const next = cursors[i].iter.next();
cursors[i].value = next.done ? undefined : next.value;
if (!cursors[i].value) cursors.splice(i, 1);
else i++;
} else {
i++;
}
}
}
}
export function* enumerateIntersectionOfIterators(iterators: DocumentMatchIterator[], allDocuments: Map<string, Types.Document>, stats: SingleQueryStats): IterableIterator<Types.Document> {
if (iterators.length <= 1) {
throw new Error('Unsupported number of iterators to intersect')
}
if (iterators.some(it => isIteratorOverNoDocuments(it))) {
stats?.mark('intersection-iterator:none')
return
}
// Check if we can optimize the intersection
if (iterators.length > 1 && iterators.every(it => isIteratorUsingIndexOfDocuments(it) && it.index.isBTree )) {
yield* _enumerateBTreeIntersection(
iterators as DocumentMatchIteratorUsingIndex[],
allDocuments,
stats
);
return;
}
const outerIterator = iterators[0]
const innerIterators = iterators.slice(1)
for (const document of _enumerateGraphIterator(outerIterator, allDocuments, stats)) {
let containedInAllOtherIterators = true
for (const innerIterator of innerIterators) {
if (isIteratorOverAllDocuments(innerIterator)) {
continue
}
if (isIteratorUsingIndexOfDocuments(innerIterator) ) {
if (!iteratorContains(innerIterator, document.id)) {
// We are handling an intersection so if this documentis not in this iterator
// it means it is not in the intersection (and we do not need to check the other iterators)
containedInAllOtherIterators = false
break
}
}
if (isIteratorUsingJoinsOfIterators(innerIterator)) {
let foundInIterator = false
for (const innerDocument of _enumerateGraphIterator(innerIterator, allDocuments, stats)) {
if (document.id === innerDocument.id) {
foundInIterator = true
break
}
}
if (!foundInIterator) {
containedInAllOtherIterators = false
break
}
}
}
if (containedInAllOtherIterators) {
stats?.increment('enumerated:intersection:set')
yield document
}
}
}
export function* enumerateUnionOfIterators(iterators: (DocumentMatchIteratorJoined|DocumentMatchIteratorUsingIndex|DocumentMatchIteratorUsingId)[], allDocuments: Map<string, Types.Document>, stats?: SingleQueryStats): IterableIterator<Types.Document> {
if (iterators.length > 1 && iterators.every(it => isIteratorUsingIndexOfDocuments(it) && it.index.isBTree)) {
yield* _enumerateBTreeUnion(
iterators as DocumentMatchIteratorUsingIndex[],
allDocuments,
stats
)
return
}
for (const iterator of iterators) {
for (const doc of _enumerateGraphIterator(iterator, allDocuments, stats)) {
stats?.increment('enumerated:union:set')
yield doc
}
}
}
/**
* Intersect a list of B-tree index iterators and enumerate the common documents.
*
* Preconditions
* • iterators.length ≥ 2
* • every iterator is { type: IterateOverIndex, set: BTree<string>, … }
* • index.isBTree === true (so set supports .minKey(), .maxKey() and .entries(from, holder))
*/
export function* _enumerateBTreeIntersection (
iterators : DocumentMatchIteratorUsingIndex[],
allDocs : Map<string, Types.Document>,
stats? : SingleQueryStats
): IterableIterator<Types.Document> {
/* ------------------------------------------------------------------ *
* 1. Early “no-overlap” check using [globalLo .. globalHi] *
* ------------------------------------------------------------------ */
let globalLo = '';
let globalHi = '\uffff';
for (const it of iterators) {
const bt = it.set as BTreeModule<string>; // BTree<string>
if (bt.size === 0) return; // empty ⇒ empty
const lo = bt.minKey() as string; // 1st key
const hi = bt.maxKey() as string; // last key
if (lo > globalLo) globalLo = lo;
if (hi < globalHi) globalHi = hi;
}
// ranges don’t overlap
if (globalLo > globalHi) {
stats?.mark('btree-intersect:empty');
return;
}
/* ------------------------------------------------------------------ *
* 2. Cursor initialisation *
* ------------------------------------------------------------------ */
type Cursor = {
bt : BTreeModule<string>, // the set
iter : Iterator<string>, // .entries() gives [k,v] but .keys() is simpler
value : string | undefined; // current doc-id
};
const cursors: Cursor[] = iterators.map(it => {
const bt = it.set as BTreeModule<string>;
/* `keys(from)` – sorted-btree v5+ helper that jumps to ≥ from *
* `entries(from,x)` – older API, holder[0] is the key */
let v: string | undefined;
let iter: Iterator<string>;
if (typeof (bt as any).keys === 'function') {
iter = (bt as any).keys(globalLo)[Symbol.iterator]();
v = (iter.next().value as string|undefined);
} else {
const holder: [string] = [undefined as unknown as string];
iter = (bt as any).entries(globalLo, holder);
v = iter.next().done ? undefined : holder[0];
}
return { bt, iter, value: v };
});
if (cursors.some(c => c.value === undefined)) return; // one set exhausted
const bTreeIntersectInfo = { k: cursors.length, loop: 0, globalLo, globalHi }
stats?.mark('btree-intersect:start', bTreeIntersectInfo);
/* ------------------------------------------------------------------ *
* 3. Multi-way merge/skip loop *
* ------------------------------------------------------------------ */
while (true) {
bTreeIntersectInfo.loop ++
// current maximum value among the cursors
let vmax = cursors[0].value as string;
for (let i = 1; i < cursors.length; ++i) {
const v = cursors[i].value as string;
if (v > vmax) vmax = v;
}
// advance every cursor whose value < vmax up to ≥ vmax
for (const c of cursors) {
while (c.value !== undefined && c.value < vmax) {
const n = c.iter.next();
c.value = n.done ? undefined : (n.value ?? (n as any).value); // handle entries(holder)
}
if (c.value === undefined) return; // one exhausted ⇒ done
if (c.value > vmax) vmax = c.value; // vmax may grow → restart outer loop
}
// if they all line up we found a common document-id
const aligned = cursors.every(c => c.value === vmax);
if (aligned) {
const doc = allDocs.get(vmax);
if (doc) {
stats?.increment('enumerated:intersection:btree')
yield doc;
}
// step all cursors once and continue
for (const c of cursors) {
const n = c.iter.next();
c.value = n.done ? undefined : (n.value ?? (n as any).value);
if (c.value === undefined) return; // exhausted
}
}
/* else: vmax changed, outer while() repeats */
}
}
function* _enumerateSortedIntersection(
iterators: DocumentMatchIterator[],
allDocuments: Map<string, Types.Document>,
stats?: SingleQueryStats
): IterableIterator<Types.Document> {
if (iterators.length < 2) return;
if (iterators.some(it => isIteratorOverNoDocuments(it))) return;
// Initialize cursors for each iterator
type Cursor = { iter: IterableIterator<Types.Document>; value: Types.Document | undefined };
const cursors: Cursor[] = iterators.map(it => {
const iter = _enumerateGraphIterator(it, allDocuments, stats);
const next = iter.next();
return { iter, value: next.done ? undefined : next.value };
});
if (cursors.some(c => !c.value)) return;
while (true) {
// Find maximum ID among cursors
let vmax = cursors[0].value!.id;
for (const c of cursors) {
if (c.value!.id > vmax) vmax = c.value!.id;
}
// Advance cursors < vmax
for (const c of cursors) {
while (c.value && c.value.id < vmax) {
const next = c.iter.next();
c.value = next.done ? undefined : next.value;
}
if (!c.value) return;
}
// Check if all cursors align
if (cursors.every(c => c.value!.id === vmax)) {
stats?.increment('enumerated:intersection:sorted')
yield cursors[0].value!;
for (const c of cursors) {
const next = c.iter.next();
c.value = next.done ? undefined : next.value;
}
if (cursors.some(c => !c.value)) return;
}
}
}
/**
* Enumerate the union of documents from a list of B-tree index iterators.
*
* Preconditions
* • iterators.length ≥ 1
* • every iterator is { type: IterateOverIndex, set: BTree<string>, … }
* • index.isBTree === true (so set supports .minKey(), .maxKey() and .entries(from, holder))
*/
export function* _enumerateBTreeUnion(
iterators: DocumentMatchIteratorUsingIndex[],
allDocs: Map<string, Types.Document>,
stats?: SingleQueryStats
): IterableIterator<Types.Document> {
/* ------------------------------------------------------------------ *
* 1. Handle edge cases *
* ------------------------------------------------------------------ */
if (iterators.length === 0) return;
/* ------------------------------------------------------------------ *
* 2. Cursor initialization *
* ------------------------------------------------------------------ */
type Cursor = {
bt: BTreeModule<string>, // the set
iter: Iterator<string>, // .entries() gives [k,v] but .keys() is simpler
value: string | undefined // current doc-id
}
const cursors: Cursor[] = iterators.map((it) => {
const bt = it.set as BTreeModule<string>
let v: string | undefined
let iter: Iterator<string>
if (typeof (bt as any).keys === 'function') {
iter = (bt as any).keys()[Symbol.iterator]()
v = iter.next().value as string | undefined
} else {
const holder: [string] = [undefined as unknown as string]
iter = (bt as any).entries(undefined, holder)
v = iter.next().done ? undefined : holder[0]
}
return { bt, iter, value: v }
})
// Filter out exhausted cursors (empty sets)
const activeCursors = cursors.filter((c) => c.value !== undefined)
if (activeCursors.length === 0) return
const bTreeUnionInfo = { k: cursors.length, loop: 0 }
stats?.mark('btree-union:start', bTreeUnionInfo)
/* ------------------------------------------------------------------ *
* 3. Multi-way merge loop *
* ------------------------------------------------------------------ */
while (activeCursors.length > 0) {
bTreeUnionInfo.loop++
// Find the minimum value among active cursors
let vmin = activeCursors[0].value as string
for (let i = 1; i < activeCursors.length; i++) {
const v = activeCursors[i].value as string
if (v < vmin) vmin = v
}
// Yield the document for vmin if it exists
const doc = allDocs.get(vmin)
if (doc) {
stats?.increment('enumerated:union:btree')
yield doc
}
// Advance all cursors that are at vmin
for (let i = 0; i < activeCursors.length; ) {
const c = activeCursors[i]
if (c.value === vmin) {
const n = c.iter.next()
c.value = n.done ? undefined : n.value ?? (n as any).value
if (c.value === undefined) {
// Remove exhausted cursor
activeCursors.splice(i, 1)
} else {
i++
}
} else {
i++
}
}
}
}
Indexing
Takes a query graph and allows to test documents against it to find out whether a document matches the query or not.
query-indexer.spec.spec.js and document-index.ts
ts
import * as Types from '@aeppic/types'
/*
* TS believes BTreeModule has a default export (which is true) and correctly
* want to import that. NodeJS though (when just running through a TS remover
* as ts-node (transpile) via ava does it), imports the module.exports
* thus the types don't match. Recasting here via explicit default property
* access to make it match
*/
import BTreeModule from 'sorted-btree'
const BTree: typeof BTreeModule = (BTreeModule as any).default ?? BTreeModule
export interface IndexInclusionFilter {
(entry: Types.Document): boolean
}
export interface GeneralIndexOptions {
inclusionFilter?: IndexInclusionFilter
useBTree?: boolean // Added option to use BTree
}
export interface PrefixTextIndexOptions extends GeneralIndexOptions { // Ensure it extends GeneralIndexOptions
type: 'prefix'
length: number
}
export type IndexOptions = GeneralIndexOptions | PrefixTextIndexOptions // Simplified, as PrefixTextIndexOptions now includes GeneralIndexOptions fields
export type IndexMap = { [fieldPath: string]: Index };
// Utility type guard for BTree
function isBTree<K, V1, V2>(mapOrBTree: Map<K, V1> | BTreeModule<K, V2>): mapOrBTree is BTreeModule<K, V2> {
return mapOrBTree instanceof BTree;
}
export class Index {
private _valuesToDocumentIds: Map<any, Set<Types.DocumentId>> | BTreeModule<any, BTreeModule<Types.DocumentId>>; // Type can be Map or BTree
private _documentIdsToValues: Map<Types.DocumentId, any[]> | BTreeModule<Types.DocumentId, any[]>;
private _isSuperSet = false
private _prefixLength: number = null
private _isBTree: boolean = false
constructor(private _fieldPath: string, private _options: IndexOptions = {}) {
if (this._options.useBTree) {
this._valuesToDocumentIds = new BTree<any, BTreeModule<string>>();
this._documentIdsToValues = new BTree<Types.DocumentId, any[]>();
this._isBTree = true
} else {
this._valuesToDocumentIds = new Map<any, Set<string>>();
this._documentIdsToValues = new Map<Types.DocumentId, any[]>();
this._isBTree = false
}
if ('type' in this._options) {
switch (this._options.type) {
case 'prefix':
this._isSuperSet = true
this._prefixLength = this._options.length
break
}
}
}
get isBTree() {
return this._isBTree
}
get fieldPath() {
return this._fieldPath
}
get prefixLength() {
return this._prefixLength
}
public get isSuperSet() {
return this._isSuperSet
}
public size() {
return this._valuesToDocumentIds.size
}
public entryCount() {
return this._documentIdsToValues.size
}
public addIfMatches(entry: Types.Document) {
if (this._options.inclusionFilter) {
if (!this._options.inclusionFilter(entry)) {
return
}
}
const fieldValue = resolveField(entry, this._fieldPath)
if (fieldValue != null && (typeof fieldValue === 'string' ? fieldValue != '' : true)) {
this._add(fieldValue, entry)
}
}
has(id: string) {
return this._documentIdsToValues.has(id)
}
private _add(value: any, entry: Types.Document) {
const mappedValues = Array.isArray(value) ?
value.map(v => this._mapValue(v)) :
[this._mapValue(value)]
for (const mappedValue of mappedValues) {
this._addSingleValue(mappedValue, entry)
}
this._documentIdsToValues.set(entry.id, mappedValues)
}
private _addSingleValue(value: any, entry: Types.Document) {
if (value == null) {
return
}
const list = this._getEntryList(value)
if (this.isBTree) {
(list as BTreeModule<any, BTreeModule<string>>).set(entry.id, null)
} else {
(list as any).add(entry.id)
}
}
public remove(id: string) {
const values = this._documentIdsToValues.get(id)
this._documentIdsToValues.delete(id)
if (values == null) {
return
}
for (const singleMappedValue of values) {
this._removeSingleValue(singleMappedValue, id)
}
}
*all() {
yield *this._documentIdsToValues.keys()
}
private _removeSingleValue(singleValue: any, entryId: string) {
const directEntries = this._valuesToDocumentIds.get(singleValue);
if (directEntries) {
directEntries.delete(entryId)
if (directEntries.size === 0) {
this._valuesToDocumentIds.delete(singleValue)
}
}
}
private _getEntryList(value: any) {
let entries = this._valuesToDocumentIds.get(value)
if (!entries) {
entries = this.isBTree ? new BTree<Types.DocumentId> : new Set<Types.DocumentId>()
this._valuesToDocumentIds.set(value, entries as any)
}
return entries
}
private _mapValue(value: any) {
if (value == null ) {
return null
}
if (typeof value !== 'string') {
value = value.toString()
}
if ('type' in this._options) {
switch (this._options.type) {
case 'prefix':
if (value.length > this._options.length) {
return value.substr(0, this._options.length).toLowerCase()
} else {
return value.toLowerCase()
}
}
}
return value
}
public getDocumentIdsForValue(value: any): Set<Types.DocumentId>|BTreeModule<Types.DocumentId> {
const mappedValue = this._mapValue(value)
if (mappedValue == null) {
return new Set()
}
return this._valuesToDocumentIds.get(mappedValue)
}
public getEntriesForRange(minValue: any, maxValue: any): Array<[key: any, entries: Set<Types.DocumentId>|BTreeModule<Types.DocumentId>]> {
const mappedMin = this._mapValue(minValue);
const mappedMax = this._mapValue(maxValue);
const matchingEntriesWithKeys: Array<[key: any, entries: Set<Types.DocumentId>]> = [];
if (isBTree(this._valuesToDocumentIds)) {
// Use BTree's range iteration
const entry: [any, Set<Types.DocumentId>] = [undefined, undefined];
const iterator = this._valuesToDocumentIds.entries(mappedMin, entry);
for (;;) {
const { done } = iterator.next();
if (done) {
break;
}
const currentKey = entry[0];
const currentSet = entry[1];
// Stop if currentKey exceeds maxValue (inclusive range check)
if (mappedMax != null && currentKey > mappedMax) {
break;
}
// BTree iterator starts at or after mappedMin. Key must be <= mappedMax.
if (currentSet && currentSet.size > 0) {
matchingEntriesWithKeys.push([currentKey, currentSet]);
}
}
} else {
// For Map-based storage, sort keys on each call
const sortedValues = Array.from(this._valuesToDocumentIds.keys());
sortedValues.sort((a, b) => {
if (a < b) return -1;
if (a > b) return 1;
return 0;
});
for (const val of sortedValues) {
// Assuming val is already mapped if it came from _valuesToDocumentIds keys (which are mapped)
if (val >= mappedMin && val <= mappedMax) {
const documentIds = this._valuesToDocumentIds.get(val);
if (documentIds && documentIds.size > 0) {
matchingEntriesWithKeys.push([val, documentIds]);
}
} else if (val > mappedMax) {
// Optimization: if current value exceeds max, subsequent values also will.
break;
}
}
}
return matchingEntriesWithKeys;
}
}
// Utility function to resolve a field in an object
//
// It will accept a path like 'a.b.c' and return the value of object.a.b.c
// If the value is an array, it will return an array of values for each element
// It does not support deeper arrays.
//
// E.g. given the object:
//
// ```json
// {
// a: 'A',
// data: {
// b: 'B',
// c: 4,
// d: {
// id: 'E',
// text: 'F',
// },
// e: [
// { id: 'G', text: 'H' },
// { id: 'I', text: 'J' },
// ],
// },
// }
// ```
//
// Then the following calls will return:
//
// resolveField(object, 'a') => 'A'
// resolveField(object, 'data.b') => 'B'
// resolveField(object, 'data.c') => 4
// resolveField(object, 'data.d.id') => 'E'
// resolveField(object, 'data.d.text') => 'F'
// resolveField(object, 'data.e.id') => ['G', 'I']
//
// This is specific to the way aeppic documents and their data fields are structured
//
function resolveField(object: object, path: string) {
const parts = path.split('.')
let value: any = object
for (const part of parts) {
if (value == null) {
return undefined
}
if (Array.isArray(value)) {
return value.map(v => v[part])
} else {
value = value[part]
}
}
if (value === object) {
value = undefined
}
return value
}
export function intersectSets(sets: Set<string>[]): string[] {
const sortedSets = new Array<Set<string>>(sets.length)
for (let i = 0; i < sets.length; i ++) {
const set = sets[i]
if (!set || set.size === 0) {
return []
}
sortedSets[i] = set
}
if (sortedSets.length === 0) {
return []
}
sortedSets.sort((a, b) => a.size - b.size)
const intersectionEnumerator = intersect(sortedSets)
return Array.from(intersectionEnumerator)
}
function intersect<T>(sets: Set<T>[]): Set<T> {
if (sets.length === 0) {
return
} else if (sets.length === 1) {
return sets[0]
} else {
const set = sets.shift()
const intersected = new Set<T>()
for (const key of set) {
let isInLowerSets = true
for (const set of sets) {
if (!set.has(key)) {
isInLowerSets = false
break
}
}
if (isInLowerSets) {
intersected.add(key)
}
}
return intersected
}
}
js
'use strict'
const test = require('tape')
const { DocumentIndexer } = require('../../dist/aeppic.cjs').Indexing
test('Build an indexer', (t) => {
t.plan(1)
t.ok(new DocumentIndexer())
})
test('Setup up an index', (t) => {
t.plan(1)
const indexer = new DocumentIndexer()
indexer.createIndex('id')
t.pass('setting up an index works')
})
test('Add a document to the indexer', (t) => {
t.plan(1)
const indexer = new DocumentIndexer()
indexer.add({ id: 'abc', data: { a: 1, b: 'hallo' } })
t.pass('adding documents')
})
test.skip('Access an index', (t) => {
t.plan(2)
const MATCHING_DOC_1 = { id: '1' }
const indexer = new DocumentIndexer()
indexer.createIndex('id')
indexer.add(MATCHING_DOC_1)
const docs = indexer.getMatchingDocuments('id', '1')
t.ok(Array.isArray(docs), 'Returns an array')
t.equal(docs[0], MATCHING_DOC_1.id)
})
test.skip('Use an index to add a document with an indexed array of string values', (t) => {
t.plan(4)
const MATCHING_DOC_1 = { id: '1', a: ['A', 'B', 'C'] }
const indexer = new DocumentIndexer()
indexer.createIndex('a')
indexer.add(MATCHING_DOC_1)
const docs = indexer.getMatchingDocuments('a', 'A')
t.equal(docs[0], MATCHING_DOC_1.id)
const docsByB = indexer.getMatchingDocuments('a', 'B')
t.equal(docsByB[0], MATCHING_DOC_1.id)
const docsByWrongValue = indexer.getMatchingDocuments('a', 'X')
t.equal(docsByWrongValue.length, 0)
indexer.remove(MATCHING_DOC_1)
const docsAfterRemoval = indexer.getMatchingDocuments('a', 'C')
t.equal(docsAfterRemoval.length, 0)
})
test.skip('An index returns objects added matching the index path', (t) => {
t.plan(4)
const MATCHING_DOC_1 = { id: '1', kind: 'abc' }
const MATCHING_DOC_2 = { id: '2', kind: 'abc' }
const NON_MATCHING_DOC = { id: '3', kind: 'xyz' }
const indexer = new DocumentIndexer()
indexer.createIndex('kind')
indexer.add(MATCHING_DOC_1)
indexer.add(MATCHING_DOC_2)
indexer.add(NON_MATCHING_DOC)
const docs = indexer.getMatchingDocuments('kind', 'abc')
t.equal(docs.length, 2, 'Result in two documents returned')
t.ok(contains(docs, MATCHING_DOC_1), 'Contains matching doc #1')
t.ok(contains(docs, MATCHING_DOC_2), 'Contains matching doc #2')
t.notOk(contains(docs, NON_MATCHING_DOC), 'Does not contain non-matches')
})
test.skip('An index returns objects added matching the index path with references too', (t) => {
t.plan(10)
const MATCHING_DOC_1 = { id: '1', nested: { kind: { id: 'abc', v: 'initial', text: '' } }, f: { id: 'A' } }
const MATCHING_DOC_2 = { id: '2', nested: { kind: { id: 'abc', v: 'initial', text: '' } }, f: { id: 'A' } }
const MATCHING_DOC_3 = { id: '3', nested: { kind: [{ id: 'abc', v: 'initial', text: '' }, { id: 'abc', v: 'initial', text: '' }] }, f: { id: 'A' } }
const MATCHING_DOC_4 = { id: '4', nested: { kind: [{ id: 'cde', v: 'initial', text: '' }, { id: 'abc', v: 'initial', text: '' }] }, f: { id: 'A' } }
const NON_MATCHING_DOC_1 = { id: '11', nested: { kind: { id: 'abc', v: 'initial', text: '' } }, f: { id: 'B' } }
const NON_MATCHING_DOC_2 = { id: '12', nested: { kind: { id: 'xyz', v: 'initial', text: '' } } }
const NON_MATCHING_DOC_3 = { id: '13', nested: { kind: [{ id: 'xyz', v: 'initial', text: '' }] } }
const indexer = new DocumentIndexer()
indexer.createIndex('nested.kind.id', { inclusionFilter: d => d.f && d.f.id === 'A' })
indexer.add(MATCHING_DOC_1)
indexer.add(MATCHING_DOC_2)
indexer.add(MATCHING_DOC_3)
indexer.add(MATCHING_DOC_4)
indexer.add(NON_MATCHING_DOC_1)
indexer.add(NON_MATCHING_DOC_2)
indexer.add(NON_MATCHING_DOC_3)
const docs = indexer.getMatchingDocuments('nested.kind.id', 'abc')
t.equal(docs.length, 4, 'Results in four documents returned')
t.ok(contains(docs, MATCHING_DOC_1), 'Contains matching doc #1')
t.ok(contains(docs, MATCHING_DOC_2), 'Contains matching doc #2')
t.ok(contains(docs, MATCHING_DOC_3), 'Contains matching doc #3')
t.ok(contains(docs, MATCHING_DOC_4), 'Contains matching doc #4')
t.notOk(contains(docs, NON_MATCHING_DOC_1), 'Does not contain non-matches (form does not match)')
t.notOk(contains(docs, NON_MATCHING_DOC_2), 'Does not contain non-matches (kind.id does not match)')
t.notOk(contains(docs, NON_MATCHING_DOC_3), 'Does not contain non-matches (kind.id in array does not match)')
const otherDocs = indexer.getMatchingDocuments('nested.kind.id', 'cde')
t.equal(otherDocs.length, 1, 'Results in one document returned')
t.ok(contains(otherDocs, MATCHING_DOC_4), 'Contains matching doc #4 since it also matches `cde`')
})
test.skip('An index can correctly remove entries with arrays', (t) => {
t.plan(8)
const MATCHING_DOC_1 = { id: '1', nested: { kind: { id: 'abc', v: 'initial', text: '' } }, f: { id: 'A' } }
const MATCHING_DOC_2 = { id: '2', nested: { kind: { id: 'abc', v: 'initial', text: '' } }, f: { id: 'A' } }
const MATCHING_DOC_3 = { id: '3', nested: { kind: [{ id: 'abc', v: 'initial', text: '' }, { id: 'abc', v: 'initial', text: '' }] }, f: { id: 'A' } }
const MATCHING_DOC_4 = { id: '4', nested: { kind: [{ id: 'cde', v: 'initial', text: '' }, { id: 'abc', v: 'initial', text: '' }] }, f: { id: 'A' } }
const indexer = new DocumentIndexer()
indexer.createIndex('nested.kind.id', { inclusionFilter: d => d.f && d.f.id === 'A' })
const emptyDocs = indexer.getMatchingDocuments('nested.kind.id', 'zzz')
t.equal(emptyDocs.length, 0, 'No matches initially')
indexer.add(MATCHING_DOC_1)
indexer.add(MATCHING_DOC_2)
indexer.add(MATCHING_DOC_3)
indexer.add(MATCHING_DOC_4)
const docs = indexer.getMatchingDocuments('nested.kind.id', 'abc')
t.equal(docs.length, 4, 'Four at first')
const otherDocs = indexer.getMatchingDocuments('nested.kind.id', 'cde')
t.equal(otherDocs.length, 1, 'One match')
t.ok(contains(otherDocs, MATCHING_DOC_4), 'Contains matching doc #4')
indexer.remove(MATCHING_DOC_3)
const docsAfterRemovingDoc3 = indexer.getMatchingDocuments('nested.kind.id', 'abc')
t.equal(docsAfterRemovingDoc3.length, 3, 'Only three left')
indexer.remove(MATCHING_DOC_4)
const docsAfterRemovingDoc4 = indexer.getMatchingDocuments('nested.kind.id', 'abc')
t.equal(docsAfterRemovingDoc4.length, 2, 'Only two left')
const docsAfterRemovingDoc4AndLookingForOtherValue = indexer.getMatchingDocuments('nested.kind.id', 'cde')
t.equal(docsAfterRemovingDoc4AndLookingForOtherValue.length, 0, 'Empty now')
indexer.remove(MATCHING_DOC_1)
indexer.remove(MATCHING_DOC_2)
const docsAfterRemovingAll = indexer.getMatchingDocuments('nested.kind.id', 'abc')
t.equal(docsAfterRemovingAll.length, 0, 'All empty now')
})
test.skip('Removing documents', (t) => {
t.plan(4)
const MATCHING_DOC_1 = { id: 1, f: { id: 'abc' } }
const MATCHING_DOC_2 = { id: 2, f: { id: 'abc' } }
const NON_MATCHING_DOC = { id: 3, f: { id: 'xyz' } }
const indexer = new DocumentIndexer()
indexer.createIndex('f.id')
indexer.add(MATCHING_DOC_1)
indexer.add(MATCHING_DOC_2)
indexer.add(NON_MATCHING_DOC)
indexer.remove(MATCHING_DOC_2)
const docs = indexer.getMatchingDocuments('f.id', 'abc')
t.ok(docs.length, 1, 'Result in one document returned')
t.ok(contains(docs, MATCHING_DOC_1), 'Contains matching doc #1')
t.notOk(contains(docs, MATCHING_DOC_2), 'No longer contains matching doc #2')
t.notOk(contains(docs, NON_MATCHING_DOC), 'Does not contain non-matches')
})
test.skip('Finding most specific index based on input', (t) => {
t.plan(6)
const DOC_ABC_1 = { id: 1, f: { id: 'abc' }, data: { status: 0 } }
const DOC_ABC_2 = { id: 2, f: { id: 'abc' }, data: { status: 1 } }
const DOC_ABC_3 = { id: 3, f: { id: 'abc' }, data: { status: 2 } }
const DOC_XYZ_1 = { id: 4, f: { id: 'xyz' }, data: { status: 2 } }
const DOC_XYZ_2 = { id: 5, f: { id: 'xyz' }, data: { status: 1 } }
const indexer = new DocumentIndexer()
indexer.createIndex('f.id')
indexer.createIndex('data.status')
indexer.add(DOC_ABC_1, DOC_ABC_2, DOC_ABC_3)
indexer.add(DOC_XYZ_1, DOC_XYZ_2)
const docSet = indexer.getShortestFilteredSet({ 'f.id': 'abc', 'data.status': 2 })
const docs = Array.from(docSet.values())
t.equal(docs.length, 2, 'Should use status index which contains only two documents with value 2')
t.notOk(contains(docs, DOC_ABC_1), 'Does not contain ABC_1 since it is not part of the specified status index')
t.notOk(contains(docs, DOC_ABC_2), 'Does not contain ABC_2 since it is not part of the specified status index')
t.ok(contains(docs, DOC_ABC_3), 'Contains ABC_3')
t.ok(contains(docs, DOC_XYZ_1), 'Contains XYZ_1 since it matches status=2 even though it does not match f.id=abc')
t.notOk(contains(docs, DOC_XYZ_2), 'Does not contain XYZ_2 since it does not match status')
})
test.skip('Finding most specific index based on input', (t) => {
t.plan(6)
const DOC_ABC_1 = { id: 1, f: { id: 'abc' }, data: { status: 0 } }
const DOC_ABC_2 = { id: 2, f: { id: 'abc' }, data: { status: 1 } }
const DOC_ABC_3 = { id: 3, f: { id: 'abc' }, data: { status: 2 } }
const DOC_XYZ_1 = { id: 4, f: { id: 'xyz' }, data: { status: 2 } }
const DOC_XYZ_2 = { id: 5, f: { id: 'xyz' }, data: { status: 1 } }
const indexer = new DocumentIndexer()
indexer.createIndex('f.id')
indexer.createIndex('data.status')
indexer.add(DOC_ABC_1, DOC_ABC_2, DOC_ABC_3)
indexer.add(DOC_XYZ_1, DOC_XYZ_2)
const joinedResults = indexer.enumerate({ 'f.id': 'abc', 'data.status': 2 })
const docs = Array.from(joinedResults)
t.ok(Array.from(docs), 1, 'Should use both indexes')
t.notOk(contains(docs, DOC_ABC_1), 'Does not contain ABC_1 since it is not part of the specified status index')
t.notOk(contains(docs, DOC_ABC_2), 'Does not contain ABC_2 since it is not part of the specified status index')
t.ok(contains(docs, DOC_ABC_3), 'Contains ABC_3')
t.notOk(contains(docs, DOC_XYZ_1), 'Does not contain XYZ_1 even though it matches status=2 because it does not match f.id=abc')
t.notOk(contains(docs, DOC_XYZ_2), 'Does not contain XYZ_2 since it does not match status')
})
test.skip('Finding most specific index based on input when input contains NON indexed fields too', (t) => {
t.plan(7)
const DOC_ABC_1 = { id: 1, f: { id: 'abc' }, data: { status: 0, name: 'a' } }
const DOC_ABC_2 = { id: 2, f: { id: 'abc' }, data: { status: 1, name: 'b' } }
const DOC_ABC_3 = { id: 3, f: { id: 'abc' }, data: { status: 2, name: 'c' } }
const DOC_XYZ_1 = { id: 4, f: { id: 'xyz' }, data: { status: 2, name: 'e' } }
const DOC_XYZ_2 = { id: 5, f: { id: 'xyz' }, data: { status: 1, name: 'e' } }
const indexer = new DocumentIndexer()
indexer.createIndex('f.id')
indexer.createIndex('data.status')
indexer.add(DOC_ABC_1, DOC_ABC_2, DOC_ABC_3)
indexer.add(DOC_XYZ_1, DOC_XYZ_2)
const elemMatchQuery = { 'f.id': 'abc', 'data.status': 2, 'data.name': 'e' }
const canEnumerate = indexer.canEnumerate(elemMatchQuery)
t.ok(canEnumerate, 'The indexer must agree that it can enumerate this')
const joinedResults = indexer.enumerate(elemMatchQuery)
const docs = Array.from(joinedResults)
t.equal(Array.from(docs).length, 1, 'Should use both indexes')
t.notOk(contains(docs, DOC_ABC_1), 'Does not contain ABC_1 since it is not part of the specified status index')
t.notOk(contains(docs, DOC_ABC_2), 'Does not contain ABC_2 since it is not part of the specified status index')
t.ok(contains(docs, DOC_ABC_3), 'Contains ABC_3 (event though the indexer.enumerate call includes data.name:e since it is not indexed')
t.notOk(contains(docs, DOC_XYZ_1), 'Does not contain XYZ_1 even though it matches status=2 because it does not match f.id=abc')
t.notOk(contains(docs, DOC_XYZ_2), 'Does not contain XYZ_2 since it does not match status')
})
test.skip('Use indexes even when no entries yet matching (empty set)', (t) => {
t.plan(2)
const DOC_ABC_1 = { id: 1, f: { id: 'abc' }, data: { status: 0, name: 'a' } }
const DOC_ABC_2 = { id: 2, f: { id: 'abc' }, data: { status: 1, name: 'b' } }
const DOC_ABC_3 = { id: 3, f: { id: 'abc' }, data: { status: 2, name: 'c' } }
const DOC_XYZ_1 = { id: 4, f: { id: 'xyz' }, data: { status: 2, name: 'e' } }
const DOC_XYZ_2 = { id: 5, f: { id: 'xyz' }, data: { status: 1, name: 'e' } }
const indexer = new DocumentIndexer()
indexer.createIndex('f.id')
indexer.createIndex('data.status')
indexer.add(DOC_ABC_1, DOC_ABC_2, DOC_ABC_3)
indexer.add(DOC_XYZ_1, DOC_XYZ_2)
const elemMatchQuery = { 'f.id': 'MNO', 'data.status': 2, 'data.name': 'e' }
const canEnumerate = indexer.canEnumerate(elemMatchQuery)
t.ok(canEnumerate, 'The indexer must agree that it can enumerate this')
const joinedResults = indexer.enumerate(elemMatchQuery)
const docs = Array.from(joinedResults)
t.equal(Array.from(docs).length, 0, 'Should not return any match (there is no entry with f.id:NMO')
})
test.skip('An index can be a prefixed text index', (t) => {
t.plan(7)
const MATCHING_DOC_1 = { id: '1', kind: 'abcDEF' }
const MATCHING_DOC_2 = { id: '2', kind: 'abcXZY' }
const NON_MATCHING_DOC_1 = { id: '3', kind: 'xyz' }
const NON_MATCHING_DOC_2 = { id: '4', kind: 'ab' }
const indexer = new DocumentIndexer()
indexer.createIndex('kind', { type: 'prefix', length: 3 })
indexer.add(MATCHING_DOC_1)
indexer.add(MATCHING_DOC_2)
indexer.add(NON_MATCHING_DOC_1)
indexer.add(NON_MATCHING_DOC_2)
const docs = indexer.getMatchingDocuments('kind', 'abc')
t.equal(docs.length, 2, 'Result in two documents returned')
t.ok(contains(docs, MATCHING_DOC_1), 'Contains matching doc #1')
t.ok(contains(docs, MATCHING_DOC_2), 'Contains matching doc #2')
t.notOk(contains(docs, NON_MATCHING_DOC_1), 'Does not contain non-matches')
t.notOk(contains(docs, NON_MATCHING_DOC_2), 'Does not contain non-matches')
const supersetDocs = indexer.getMatchingDocuments('kind', 'abcDEF')
t.equal(supersetDocs.length, 2, 'Result in two documents returned as it is a superset')
const canEnumerate = indexer.canEnumerate({ kind: 'abcd' })
t.ok(canEnumerate)
})
function contains(arrayOfItemIds, item) {
for (const e of arrayOfItemIds) {
if (e === item.id) {
return true
}
}
return false
}