*/
import React, { Component } from 'react';
import lunr from 'lunr';
+import { sortBy } from 'lodash';
import ClearIcon from './icons/ClearIcon';
import { getUrlsList } from '../utils';
super(props);
this.state = { value: '' };
this.index = lunr(function() {
+ this.use(tokenContextPlugin);
this.ref('id');
this.field('title', { boost: 10 });
this.field('text');
- this.metadataWhitelist = ['position'];
+ this.metadataWhitelist = ['position', 'tokenContext'];
props.pages
.filter(page =>
}
getFormattedResults = (query, results) => {
- return results.map(match => {
+ const formattedResults = results.map(match => {
const page = this.props.pages.find(page => page.id === match.ref);
const highlights = {};
let longestTerm = '';
+ let exactMatch = false;
- // remember the longest term that matches the query *exactly*
+ // Loop over all matching terms/tokens.
Object.keys(match.matchData.metadata).forEach(term => {
- if (query.toLowerCase().includes(term.toLowerCase()) && longestTerm.length < term.length) {
+ // Remember the longest term that matches the query as close as possible.
+ if (query.includes(term.toLowerCase()) && longestTerm.length < term.length) {
longestTerm = term;
}
Object.keys(match.matchData.metadata[term]).forEach(fieldName => {
- const { position: positions } = match.matchData.metadata[term][fieldName];
+ const { position: positions, tokenContext: tokenContexts } = match.matchData.metadata[
+ term
+ ][fieldName];
+
highlights[fieldName] = [...(highlights[fieldName] || []), ...positions];
+
+ // Check if we have an *exact match*.
+ if (!exactMatch && tokenContexts) {
+ tokenContexts.forEach(tokenContext => {
+ if (!exactMatch && tokenContext.includes(query)) {
+ exactMatch = true;
+ }
+ });
+ }
});
});
title: page.frontmatter.title,
url: page.frontmatter.url || page.fields.slug
},
+ exactMatch,
highlights,
+ query,
longestTerm
};
});
+
+ // Re-order results by the length of the longest matched term and by exact
+ // match (if applicable). The longer the matched term is, the higher the
+ // chance the result is more relevant.
+ return sortBy(
+ // Sort by longest term.
+ sortBy(formattedResults, result => -result.longestTerm.length),
+ // Sort by exact match.
+ result => result.exactMatch && -1
+ );
};
handleClear = event => {
);
}
}
+
+// Lunr doesn't support exact multiple-term matching. Meaning "foo bar" will not
+// boost a sentence like "Foo bar baz" more than "Baz bar foo". In order to
+// provide more accurate results, we store the token context, to see if we can
+// perform an "exact match". Unfortunately, we cannot extend the search logic,
+// only the tokenizer at *index time*. This is why we store the context as
+// meta-data, and post-process the matches before rendering (see above). For
+// performance reasons, we only add 2 extra tokens, one in front, one after.
+// This means we support "exact macthing" for up to 3 terms. More search terms
+// would fallback to the regular matching algorithm, which is OK: the more terms
+// searched for, the better the standard algorithm will perform anyway. In the
+// end, the best would be for Lunr to support multi-term matching, as extending
+// the search algorithm for this would be way too complicated.
+function tokenContextPlugin(builder) {
+ const pipelineFunction = (token, index, tokens) => {
+ const prevToken = tokens[index - 1] || '';
+ const nextToken = tokens[index + 1] || '';
+ token.metadata['tokenContext'] = [prevToken.toString(), token.toString(), nextToken.toString()]
+ .filter(s => s.length)
+ .join(' ')
+ .toLowerCase();
+ return token;
+ };
+
+ lunr.Pipeline.registerFunction(pipelineFunction, 'tokenContext');
+ builder.pipeline.before(lunr.stemmer, pipelineFunction);
+ builder.metadataWhitelist.push('tokenContext');
+}
ref(field: string): void;
+ use(fn: Function): void;
+
metadataWhitelist?: string[];
}
+ export interface LunrBuilder {
+ pipeline: any;
+ metadataWhitelist: string[];
+ }
+
+ export interface LunrIndex {
+ search(query: string): LunrMatch[];
+ }
+
export interface LunrInit {
(this: Lunr): void;
}
matchData: { metadata: any };
}
- export interface LunrIndex {
- search(query: string): LunrMatch[];
+ export interface LunrToken {
+ str: string;
+ metadata: any;
}
function lunr(initializer: LunrInit): LunrIndex;
import DocMarkdownBlock from '../../../components/docs/DocMarkdownBlock';
import { translate } from '../../../helpers/l10n';
import { isSonarCloud } from '../../../helpers/system';
+import { addSideBarClass, removeSideBarClass } from '../../../helpers/pages';
import { DocsNavigationItem } from '../utils';
import '../styles.css';
pages = getPages();
componentDidMount() {
- const footer = document.getElementById('footer');
- if (footer) {
- footer.classList.add('page-footer-with-sidebar', 'documentation-footer');
- }
+ addSideBarClass();
}
componentWillUnmount() {
- const footer = document.getElementById('footer');
- if (footer) {
- footer.classList.remove('page-footer-with-sidebar', 'documentation-footer');
- }
+ removeSideBarClass();
}
render() {
import { highlightMarks, cutWords, DocumentationEntry } from '../utils';
export interface SearchResult {
+ exactMatch?: boolean;
highlights: { [field: string]: [number, number][] };
longestTerm: string;
page: DocumentationEntry;
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
import * as React from 'react';
-import lunr, { LunrIndex } from 'lunr';
+import lunr, { LunrBuilder, LunrIndex, LunrToken } from 'lunr';
import { sortBy } from 'lodash';
import SearchResultEntry, { SearchResult } from './SearchResultEntry';
import { DocumentationEntry, getUrlsList, DocsNavigationItem } from '../utils';
constructor(props: Props) {
super(props);
this.index = lunr(function() {
+ this.use(tokenContextPlugin);
this.ref('relativeName');
this.field('title', { boost: 10 });
this.field('text');
- this.metadataWhitelist = ['position'];
+ this.metadataWhitelist = ['position', 'tokenContext'];
props.pages
.filter(page => getUrlsList(props.navigation).includes(page.url))
}
render() {
- const { query } = this.props;
+ const query = this.props.query.toLowerCase();
const results = this.index
- .search(`${query}~1 ${query}*`)
+ .search(
+ query
+ .split(/\s+/)
+ .map(s => `${s}~1 ${s}*`)
+ .join(' ')
+ )
.map(match => {
const page = this.props.pages.find(page => page.relativeName === match.ref);
const highlights: { [field: string]: [number, number][] } = {};
let longestTerm = '';
+ let exactMatch = false;
- // remember the longest term that matches the query *exactly*
+ // Loop over all matching terms/tokens.
Object.keys(match.matchData.metadata).forEach(term => {
- if (
- query.toLowerCase().includes(term.toLowerCase()) &&
- longestTerm.length < term.length
- ) {
+ // Remember the longest term that matches the query as close as possible.
+ if (query.includes(term.toLowerCase()) && longestTerm.length < term.length) {
longestTerm = term;
}
Object.keys(match.matchData.metadata[term]).forEach(fieldName => {
- const { position: positions } = match.matchData.metadata[term][fieldName];
+ const { position: positions, tokenContext: tokenContexts } = match.matchData.metadata[
+ term
+ ][fieldName];
+
highlights[fieldName] = [...(highlights[fieldName] || []), ...positions];
+
+ // Check if we have an *exact match*.
+ if (!exactMatch && tokenContexts) {
+ tokenContexts.forEach((tokenContext: string) => {
+ if (!exactMatch && tokenContext.includes(query)) {
+ exactMatch = true;
+ }
+ });
+ }
});
});
- return { page, highlights, longestTerm };
+ return { page, highlights, longestTerm, exactMatch };
})
.filter(result => result.page) as SearchResult[];
- // re-order results by the length of the longest matched term
- // the longer term is the more chances the result is more relevant
- const sortedResults = sortBy(results, result => -result.longestTerm.length);
+ // Re-order results by the length of the longest matched term and by exact
+ // match (if applicable). The longer the matched term is, the higher the
+ // chance the result is more relevant.
+ const sortedResults = sortBy(
+ // Sort by longest term.
+ sortBy(results, result => -result.longestTerm.length),
+ // Sort by exact match.
+ result => result.exactMatch && -1
+ );
return (
<>
);
}
}
+
+// Lunr doesn't support exact multiple-term matching. Meaning "foo bar" will not
+// boost a sentence like "Foo bar baz" more than "Baz bar foo". In order to
+// provide more accurate results, we store the token context, to see if we can
+// perform an "exact match". Unfortunately, we cannot extend the search logic,
+// only the tokenizer at *index time*. This is why we store the context as
+// meta-data, and post-process the matches before rendering (see above). For
+// performance reasons, we only add 2 extra tokens, one in front, one after.
+// This means we support "exact macthing" for up to 3 terms. More search terms
+// would fallback to the regular matching algorithm, which is OK: the more terms
+// searched for, the better the standard algorithm will perform anyway. In the
+// end, the best would be for Lunr to support multi-term matching, as extending
+// the search algorithm for this would be way too complicated.
+function tokenContextPlugin(builder: LunrBuilder) {
+ const pipelineFunction = (token: LunrToken, index: number, tokens: LunrToken[]) => {
+ const prevToken = tokens[index - 1] || '';
+ const nextToken = tokens[index + 1] || '';
+ token.metadata['tokenContext'] = [prevToken.toString(), token.toString(), nextToken.toString()]
+ .filter(s => s.length)
+ .join(' ')
+ .toLowerCase();
+ return token;
+ };
+
+ (lunr as any).Pipeline.registerFunction(pipelineFunction, 'tokenContext');
+ builder.pipeline.before((lunr as any).stemmer, pipelineFunction);
+ builder.metadataWhitelist.push('tokenContext');
+}
state: State = { query: '' };
handleSearch = (query: string) => {
- this.setState({ query });
+ this.setState({ query: query.trim() });
};
render() {
{
ref: 'lorem/origin',
matchData: {
- metadata: { from: { title: { position: [[19, 5]] }, text: { position: [[121, 4]] } } }
+ metadata: {
+ simply: {
+ title: { position: [[19, 5]] },
+ text: {
+ position: [[15, 6], [28, 4]],
+ tokenContext: ['is simply dummy', 'simply dummy text']
+ }
+ }
+ }
}
},
- { ref: 'foobar', matchData: { metadata: { from: { title: { position: [[23, 4]] } } } } }
+ {
+ ref: 'foobar',
+ matchData: {
+ metadata: {
+ simply: {
+ title: { position: [[23, 4]] },
+ text: { position: [[111, 6], [118, 4]], tokenContext: ['keywords simply text'] }
+ }
+ }
+ }
+ }
])
}))
}));
createPage(
'Where does Foobar come from?',
'foobar',
- 'Foobar is a universal variable understood to represent whatever is being discussed.'
+ 'Foobar is a universal variable understood to represent whatever is being discussed. Now we need some keywords: simply text.'
)
];
<SearchResults
navigation={['lorem/index', 'lorem/origin', 'foobar']}
pages={pages}
- query="from"
+ query="simply text"
splat="foobar"
/>
);
expect(wrapper).toMatchSnapshot();
expect(lunr).toBeCalled();
- expect((wrapper.instance() as SearchResults).index.search).toBeCalledWith('from~1 from*');
+ expect((wrapper.instance() as SearchResults).index.search).toBeCalledWith(
+ 'simply~1 simply* text~1 text*'
+ );
});
exports[`should search 1`] = `
<Fragment>
<SearchResultEntry
- active={false}
- key="lorem/origin"
+ active={true}
+ key="foobar"
result={
Object {
+ "exactMatch": true,
"highlights": Object {
"text": Array [
Array [
- 121,
+ 111,
+ 6,
+ ],
+ Array [
+ 118,
4,
],
],
"title": Array [
Array [
- 19,
- 5,
+ 23,
+ 4,
],
],
},
- "longestTerm": "from",
+ "longestTerm": "simply",
"page": Object {
- "content": "Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words.",
+ "content": "Foobar is a universal variable understood to represent whatever is being discussed. Now we need some keywords: simply text.",
"navTitle": undefined,
- "relativeName": "lorem/origin",
- "text": "Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words.",
- "title": "Where does it come from?",
- "url": "/lorem/origin",
+ "relativeName": "foobar",
+ "text": "Foobar is a universal variable understood to represent whatever is being discussed. Now we need some keywords: simply text.",
+ "title": "Where does Foobar come from?",
+ "url": "/foobar",
},
}
}
/>
<SearchResultEntry
- active={true}
- key="foobar"
+ active={false}
+ key="lorem/origin"
result={
Object {
+ "exactMatch": false,
"highlights": Object {
- "title": Array [
+ "text": Array [
Array [
- 23,
+ 15,
+ 6,
+ ],
+ Array [
+ 28,
4,
],
],
+ "title": Array [
+ Array [
+ 19,
+ 5,
+ ],
+ ],
},
- "longestTerm": "from",
+ "longestTerm": "simply",
"page": Object {
- "content": "Foobar is a universal variable understood to represent whatever is being discussed.",
+ "content": "Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words.",
"navTitle": undefined,
- "relativeName": "foobar",
- "text": "Foobar is a universal variable understood to represent whatever is being discussed.",
- "title": "Where does Foobar come from?",
- "url": "/foobar",
+ "relativeName": "lorem/origin",
+ "text": "Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words.",
+ "title": "Where does it come from?",
+ "url": "/lorem/origin",
},
}
}