summaryrefslogtreecommitdiffstats
path: root/lib/public/FullTextSearch/IFullTextSearchProvider.php
blob: c9e4e44d93132361431058e3926d946a91f841ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
<?php

declare(strict_types=1);

/**
 * @copyright 2018
 *
 * @author Maxence Lange <maxence@artificial-owl.com>
 * @author Roeland Jago Douma <roeland@famdouma.nl>
 *
 * @license GNU AGPL version 3 or any later version
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 */
namespace OCP\FullTextSearch;

use OCP\FullTextSearch\Model\IIndex;
use OCP\FullTextSearch\Model\IIndexDocument;
use OCP\FullTextSearch\Model\IIndexOptions;
use OCP\FullTextSearch\Model\IRunner;
use OCP\FullTextSearch\Model\ISearchRequest;
use OCP\FullTextSearch\Model\ISearchResult;
use OCP\FullTextSearch\Model\ISearchTemplate;

/**
 * Interface IFullTextSearchProvider
 *
 * This interface must be use when creating a Content Provider for FullTextSearch.
 *
 * A Content Provider is an extension to the FullTextSearch that will extract and
 * provide content to the FullTextSearch.
 *
 * There is no limit to the number of Content Provider that can be integrated to
 * FullTextSearch. Each Content Provider corresponding to a type of content
 * available in Nextcloud (files, bookmarks, notes, deck cards, mails, ...)
 *
 * Content is split in document identified by an ID and the ID of the Content
 * Provider. The content is indexed by a Search Platform that will returns a
 * documentId as a result on a search request.
 *
 *
 * To oversimplify the mechanism:
 *
 * - When indexing, FullTextSearch will ask for documents to every Content Provider.
 * - On search, results from the Search Platform, identified by documentId, will
 *   be improved by each relative Content Provider.
 *
 *
 * The Content Provider is a PHP class that implement this interface and is defined
 * in appinfo/info.xml of the app that contains that class:
 *
 *    <fulltextsearch>
 *      <provider>OCA\YourApp\YourContentProvider</provider>
 *    </fulltextsearch>
 *
 * Multiple Content Provider can be defined in a single app.
 *
 * @since 15.0.0
 *
 */
interface IFullTextSearchProvider {
	/**
	 * Must returns a unique Id used to identify the Content Provider.
	 * Id must contains only alphanumeric chars, with no space.
	 *
	 * @since 15.0.0
	 *
	 * @return string
	 */
	public function getId(): string;


	/**
	 * Must returns a descriptive name of the Content Provider.
	 * This is used in multiple places, so better use a clear display name.
	 *
	 * @since 15.0.0
	 *
	 * @return string
	 */
	public function getName(): string;


	/**
	 * Should returns the current configuration of the Content Provider.
	 * This is used to display the configuration when using the
	 * ./occ fulltextsearch:check command line.
	 *
	 * @since 15.0.0
	 *
	 * @return array
	 */
	public function getConfiguration(): array;


	/**
	 * Must returns a ISearchTemplate that contains displayable items and
	 * available options to users when searching.
	 *
	 * @see ISearchTemplate
	 *
	 * @since 15.0.0
	 *
	 * @return ISearchTemplate
	 */
	public function getSearchTemplate(): ISearchTemplate;


	/**
	 * Called when FullTextSearch is loading your Content Provider.
	 *
	 * @since 15.0.0
	 */
	public function loadProvider();


	/**
	 * Set the wrapper of the currently executed process.
	 * Because the index process can be long and heavy, and because errors can
	 * be encountered during the process, the IRunner is a wrapper that allow the
	 * Content Provider to communicate with the process initiated by
	 * FullTextSearch.
	 *
	 * The IRunner is coming with some methods so the Content Provider can
	 * returns important information and errors to be displayed to the admin.
	 *
	 * @since 15.0.0
	 *
	 * @param IRunner $runner
	 */
	public function setRunner(IRunner $runner);


	/**
	 * This method is called when the administrator specify options when running
	 * the ./occ fulltextsearch:index or ./occ fulltextsearch:live
	 *
	 * @since 15.0.0
	 *
	 * @param IIndexOptions $options
	 */
	public function setIndexOptions(IIndexOptions $options);


	/**
	 * Allow the provider to generate a list of chunk to split a huge list of
	 * indexable documents
	 *
	 * During the indexing the generateIndexableDocuments method will be called
	 * for each entry of the returned array.
	 * If the returned array is empty, the generateIndexableDocuments() will be
	 * called only once (per user).
	 *
	 * @since 16.0.0
	 *
	 * @param string $userId
	 *
	 * @return string[]
	 */
	public function generateChunks(string $userId): array;


	/**
	 * Returns all indexable document for a user as an array of IIndexDocument.
	 *
	 * There is no need to fill each IIndexDocument with content; at this point,
	 * only fill the object with the minimum information to not waste memory while
	 * still being able to identify the document it is referring to.
	 *
	 * FullTextSearch will call 2 other methods of this interface for each
	 * IIndexDocument of the array, prior to their indexing:
	 *
	 * - first, to compare the date of the last index,
	 * - then, to fill each IIndexDocument with complete data
	 *
	 * @see IIndexDocument
	 *
	 * @since 15.0.0
	 *  -> 16.0.0: the parameter "$chunk" was added
	 *
	 * @param string $userId
	 * @param string $chunk
	 *
	 * @return IIndexDocument[]
	 */
	public function generateIndexableDocuments(string $userId, string $chunk): array;


	/**
	 * Called to verify that the document is not already indexed and that the
	 * old index is not up-to-date, using the IIndex from
	 * IIndexDocument->getIndex()
	 *
	 * Returning true will not queue the current IIndexDocument to any further
	 * operation and will continue on the next element from the list returned by
	 * generateIndexableDocuments().
	 *
	 * @since 15.0.0
	 *
	 * @param IIndexDocument $document
	 *
	 * @return bool
	 */
	public function isDocumentUpToDate(IIndexDocument $document): bool;


	/**
	 * Must fill IIndexDocument with all information relative to the document,
	 * before its indexing by the Search Platform.
	 *
	 * Method is called for each element returned previously by
	 * generateIndexableDocuments().
	 *
	 * @see IIndexDocument
	 *
	 * @since 15.0.0
	 *
	 * @param IIndexDocument $document
	 */
	public function fillIndexDocument(IIndexDocument $document);


	/**
	 * The Search Provider must create and return an IIndexDocument
	 * based on the IIndex and its status. The IIndexDocument must contains all
	 * information as it will be send for indexing.
	 *
	 * Method is called during a cron or a ./occ fulltextsearch:live after a
	 * new document is created, or an old document is set as modified.
	 *
	 * @since 15.0.0
	 *
	 * @param IIndex $index
	 *
	 * @return IIndexDocument
	 */
	public function updateDocument(IIndex $index): IIndexDocument;


	/**
	 * Called when an index is initiated by the administrator.
	 * This is should only be used in case of a specific mapping is needed.
	 * (ie. _almost_ never)
	 *
	 * @since 15.0.0
	 *
	 * @param IFullTextSearchPlatform $platform
	 */
	public function onInitializingIndex(IFullTextSearchPlatform $platform);


	/**
	 * Called when administrator is resetting the index.
	 * This is should only be used in case of a specific mapping has been
	 * created.
	 *
	 * @since 15.0.0
	 *
	 * @param IFullTextSearchPlatform $platform
	 */
	public function onResettingIndex(IFullTextSearchPlatform $platform);


	/**
	 * Method is called when a search request is initiated by a user, prior to
	 * be sent to the Search Platform.
	 *
	 * Your Content Provider can interact with the ISearchRequest to apply the
	 * search options and make the search more precise.
	 *
	 * @see ISearchRequest
	 *
	 * @since 15.0.0
	 *
	 * @param ISearchRequest $searchRequest
	 */
	public function improveSearchRequest(ISearchRequest $searchRequest);


	/**
	 * Method is called after results of a search are returned by the
	 * Search Platform.
	 *
	 * Your Content Provider can detail each entry with local data to improve
	 * the display of the search result.
	 *
	 * @see ISearchResult
	 *
	 * @since 15.0.0
	 *
	 * @param ISearchResult $searchResult
	 */
	public function improveSearchResult(ISearchResult $searchResult);


	/**
	 * not used yet.
	 *
	 * @since 15.0.0
	 */
	public function unloadProvider();
}