You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

document.go 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mapping
  15. import (
  16. "encoding"
  17. "encoding/json"
  18. "fmt"
  19. "reflect"
  20. "time"
  21. "github.com/blevesearch/bleve/registry"
  22. )
  23. // A DocumentMapping describes how a type of document
  24. // should be indexed.
  25. // As documents can be hierarchical, named sub-sections
  26. // of documents are mapped using the same structure in
  27. // the Properties field.
  28. // Each value inside a document can be indexed 0 or more
  29. // ways. These index entries are called fields and
  30. // are stored in the Fields field.
  31. // Entire sections of a document can be ignored or
  32. // excluded by setting Enabled to false.
  33. // If not explicitly mapped, default mapping operations
  34. // are used. To disable this automatic handling, set
  35. // Dynamic to false.
  36. type DocumentMapping struct {
  37. Enabled bool `json:"enabled"`
  38. Dynamic bool `json:"dynamic"`
  39. Properties map[string]*DocumentMapping `json:"properties,omitempty"`
  40. Fields []*FieldMapping `json:"fields,omitempty"`
  41. DefaultAnalyzer string `json:"default_analyzer"`
  42. // StructTagKey overrides "json" when looking for field names in struct tags
  43. StructTagKey string `json:"struct_tag_key,omitempty"`
  44. }
  45. func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
  46. var err error
  47. if dm.DefaultAnalyzer != "" {
  48. _, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
  49. if err != nil {
  50. return err
  51. }
  52. }
  53. for _, property := range dm.Properties {
  54. err = property.Validate(cache)
  55. if err != nil {
  56. return err
  57. }
  58. }
  59. for _, field := range dm.Fields {
  60. if field.Analyzer != "" {
  61. _, err = cache.AnalyzerNamed(field.Analyzer)
  62. if err != nil {
  63. return err
  64. }
  65. }
  66. if field.DateFormat != "" {
  67. _, err = cache.DateTimeParserNamed(field.DateFormat)
  68. if err != nil {
  69. return err
  70. }
  71. }
  72. switch field.Type {
  73. case "text", "datetime", "number", "boolean", "geopoint":
  74. default:
  75. return fmt.Errorf("unknown field type: '%s'", field.Type)
  76. }
  77. }
  78. return nil
  79. }
  80. // analyzerNameForPath attempts to first find the field
  81. // described by this path, then returns the analyzer
  82. // configured for that field
  83. func (dm *DocumentMapping) analyzerNameForPath(path string) string {
  84. field := dm.fieldDescribedByPath(path)
  85. if field != nil {
  86. return field.Analyzer
  87. }
  88. return ""
  89. }
  90. func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
  91. pathElements := decodePath(path)
  92. if len(pathElements) > 1 {
  93. // easy case, there is more than 1 path element remaining
  94. // the next path element must match a property name
  95. // at this level
  96. for propName, subDocMapping := range dm.Properties {
  97. if propName == pathElements[0] {
  98. return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
  99. }
  100. }
  101. } else {
  102. // just 1 path elememnt
  103. // first look for property name with empty field
  104. for propName, subDocMapping := range dm.Properties {
  105. if propName == pathElements[0] {
  106. // found property name match, now look at its fields
  107. for _, field := range subDocMapping.Fields {
  108. if field.Name == "" || field.Name == pathElements[0] {
  109. // match
  110. return field
  111. }
  112. }
  113. }
  114. }
  115. // next, walk the properties again, looking for field overriding the name
  116. for propName, subDocMapping := range dm.Properties {
  117. if propName != pathElements[0] {
  118. // property name isn't a match, but field name could override it
  119. for _, field := range subDocMapping.Fields {
  120. if field.Name == pathElements[0] {
  121. return field
  122. }
  123. }
  124. }
  125. }
  126. }
  127. return nil
  128. }
  129. // documentMappingForPath only returns EXACT matches for a sub document
  130. // or for an explicitly mapped field, if you want to find the
  131. // closest document mapping to a field not explicitly mapped
  132. // use closestDocMapping
  133. func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
  134. pathElements := decodePath(path)
  135. current := dm
  136. OUTER:
  137. for i, pathElement := range pathElements {
  138. for name, subDocMapping := range current.Properties {
  139. if name == pathElement {
  140. current = subDocMapping
  141. continue OUTER
  142. }
  143. }
  144. // no subDocMapping matches this pathElement
  145. // only if this is the last element check for field name
  146. if i == len(pathElements)-1 {
  147. for _, field := range current.Fields {
  148. if field.Name == pathElement {
  149. break
  150. }
  151. }
  152. }
  153. return nil
  154. }
  155. return current
  156. }
  157. // closestDocMapping findest the most specific document mapping that matches
  158. // part of the provided path
  159. func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
  160. pathElements := decodePath(path)
  161. current := dm
  162. OUTER:
  163. for _, pathElement := range pathElements {
  164. for name, subDocMapping := range current.Properties {
  165. if name == pathElement {
  166. current = subDocMapping
  167. continue OUTER
  168. }
  169. }
  170. }
  171. return current
  172. }
  173. // NewDocumentMapping returns a new document mapping
  174. // with all the default values.
  175. func NewDocumentMapping() *DocumentMapping {
  176. return &DocumentMapping{
  177. Enabled: true,
  178. Dynamic: true,
  179. }
  180. }
  181. // NewDocumentStaticMapping returns a new document
  182. // mapping that will not automatically index parts
  183. // of a document without an explicit mapping.
  184. func NewDocumentStaticMapping() *DocumentMapping {
  185. return &DocumentMapping{
  186. Enabled: true,
  187. }
  188. }
  189. // NewDocumentDisabledMapping returns a new document
  190. // mapping that will not perform any indexing.
  191. func NewDocumentDisabledMapping() *DocumentMapping {
  192. return &DocumentMapping{}
  193. }
  194. // AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
  195. // for the specified named subsection.
  196. func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
  197. if dm.Properties == nil {
  198. dm.Properties = make(map[string]*DocumentMapping)
  199. }
  200. dm.Properties[property] = sdm
  201. }
  202. // AddFieldMappingsAt adds one or more FieldMappings
  203. // at the named sub-document. If the named sub-document
  204. // doesn't yet exist it is created for you.
  205. // This is a convenience function to make most common
  206. // mappings more concise.
  207. // Otherwise, you would:
  208. // subMapping := NewDocumentMapping()
  209. // subMapping.AddFieldMapping(fieldMapping)
  210. // parentMapping.AddSubDocumentMapping(property, subMapping)
  211. func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
  212. if dm.Properties == nil {
  213. dm.Properties = make(map[string]*DocumentMapping)
  214. }
  215. sdm, ok := dm.Properties[property]
  216. if !ok {
  217. sdm = NewDocumentMapping()
  218. }
  219. for _, fm := range fms {
  220. sdm.AddFieldMapping(fm)
  221. }
  222. dm.Properties[property] = sdm
  223. }
  224. // AddFieldMapping adds the provided FieldMapping for this section
  225. // of the document.
  226. func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
  227. if dm.Fields == nil {
  228. dm.Fields = make([]*FieldMapping, 0)
  229. }
  230. dm.Fields = append(dm.Fields, fm)
  231. }
  232. // UnmarshalJSON offers custom unmarshaling with optional strict validation
  233. func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
  234. var tmp map[string]json.RawMessage
  235. err := json.Unmarshal(data, &tmp)
  236. if err != nil {
  237. return err
  238. }
  239. // set defaults for fields which might have been omitted
  240. dm.Enabled = true
  241. dm.Dynamic = true
  242. var invalidKeys []string
  243. for k, v := range tmp {
  244. switch k {
  245. case "enabled":
  246. err := json.Unmarshal(v, &dm.Enabled)
  247. if err != nil {
  248. return err
  249. }
  250. case "dynamic":
  251. err := json.Unmarshal(v, &dm.Dynamic)
  252. if err != nil {
  253. return err
  254. }
  255. case "default_analyzer":
  256. err := json.Unmarshal(v, &dm.DefaultAnalyzer)
  257. if err != nil {
  258. return err
  259. }
  260. case "properties":
  261. err := json.Unmarshal(v, &dm.Properties)
  262. if err != nil {
  263. return err
  264. }
  265. case "fields":
  266. err := json.Unmarshal(v, &dm.Fields)
  267. if err != nil {
  268. return err
  269. }
  270. case "struct_tag_key":
  271. err := json.Unmarshal(v, &dm.StructTagKey)
  272. if err != nil {
  273. return err
  274. }
  275. default:
  276. invalidKeys = append(invalidKeys, k)
  277. }
  278. }
  279. if MappingJSONStrict && len(invalidKeys) > 0 {
  280. return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
  281. }
  282. return nil
  283. }
  284. func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
  285. rv := ""
  286. current := dm
  287. for _, pathElement := range path {
  288. var ok bool
  289. current, ok = current.Properties[pathElement]
  290. if !ok {
  291. break
  292. }
  293. if current.DefaultAnalyzer != "" {
  294. rv = current.DefaultAnalyzer
  295. }
  296. }
  297. return rv
  298. }
  299. func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
  300. // allow default "json" tag to be overriden
  301. structTagKey := dm.StructTagKey
  302. if structTagKey == "" {
  303. structTagKey = "json"
  304. }
  305. val := reflect.ValueOf(data)
  306. typ := val.Type()
  307. switch typ.Kind() {
  308. case reflect.Map:
  309. // FIXME can add support for other map keys in the future
  310. if typ.Key().Kind() == reflect.String {
  311. for _, key := range val.MapKeys() {
  312. fieldName := key.String()
  313. fieldVal := val.MapIndex(key).Interface()
  314. dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
  315. }
  316. }
  317. case reflect.Struct:
  318. for i := 0; i < val.NumField(); i++ {
  319. field := typ.Field(i)
  320. fieldName := field.Name
  321. // anonymous fields of type struct can elide the type name
  322. if field.Anonymous && field.Type.Kind() == reflect.Struct {
  323. fieldName = ""
  324. }
  325. // if the field has a name under the specified tag, prefer that
  326. tag := field.Tag.Get(structTagKey)
  327. tagFieldName := parseTagName(tag)
  328. if tagFieldName == "-" {
  329. continue
  330. }
  331. // allow tag to set field name to empty, only if anonymous
  332. if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
  333. fieldName = tagFieldName
  334. }
  335. if val.Field(i).CanInterface() {
  336. fieldVal := val.Field(i).Interface()
  337. newpath := path
  338. if fieldName != "" {
  339. newpath = append(path, fieldName)
  340. }
  341. dm.processProperty(fieldVal, newpath, indexes, context)
  342. }
  343. }
  344. case reflect.Slice, reflect.Array:
  345. for i := 0; i < val.Len(); i++ {
  346. if val.Index(i).CanInterface() {
  347. fieldVal := val.Index(i).Interface()
  348. dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
  349. }
  350. }
  351. case reflect.Ptr:
  352. ptrElem := val.Elem()
  353. if ptrElem.IsValid() && ptrElem.CanInterface() {
  354. dm.processProperty(ptrElem.Interface(), path, indexes, context)
  355. }
  356. case reflect.String:
  357. dm.processProperty(val.String(), path, indexes, context)
  358. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  359. dm.processProperty(float64(val.Int()), path, indexes, context)
  360. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  361. dm.processProperty(float64(val.Uint()), path, indexes, context)
  362. case reflect.Float32, reflect.Float64:
  363. dm.processProperty(float64(val.Float()), path, indexes, context)
  364. case reflect.Bool:
  365. dm.processProperty(val.Bool(), path, indexes, context)
  366. }
  367. }
  368. func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
  369. pathString := encodePath(path)
  370. // look to see if there is a mapping for this field
  371. subDocMapping := dm.documentMappingForPath(pathString)
  372. closestDocMapping := dm.closestDocMapping(pathString)
  373. // check to see if we even need to do further processing
  374. if subDocMapping != nil && !subDocMapping.Enabled {
  375. return
  376. }
  377. propertyValue := reflect.ValueOf(property)
  378. if !propertyValue.IsValid() {
  379. // cannot do anything with the zero value
  380. return
  381. }
  382. propertyType := propertyValue.Type()
  383. switch propertyType.Kind() {
  384. case reflect.String:
  385. propertyValueString := propertyValue.String()
  386. if subDocMapping != nil {
  387. // index by explicit mapping
  388. for _, fieldMapping := range subDocMapping.Fields {
  389. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  390. }
  391. } else if closestDocMapping.Dynamic {
  392. // automatic indexing behavior
  393. // first see if it can be parsed by the default date parser
  394. dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
  395. if dateTimeParser != nil {
  396. parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
  397. if err != nil {
  398. // index as text
  399. fieldMapping := newTextFieldMappingDynamic(context.im)
  400. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  401. } else {
  402. // index as datetime
  403. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  404. fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
  405. }
  406. }
  407. }
  408. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  409. dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
  410. return
  411. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  412. dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
  413. return
  414. case reflect.Float64, reflect.Float32:
  415. propertyValFloat := propertyValue.Float()
  416. if subDocMapping != nil {
  417. // index by explicit mapping
  418. for _, fieldMapping := range subDocMapping.Fields {
  419. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  420. }
  421. } else if closestDocMapping.Dynamic {
  422. // automatic indexing behavior
  423. fieldMapping := newNumericFieldMappingDynamic(context.im)
  424. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  425. }
  426. case reflect.Bool:
  427. propertyValBool := propertyValue.Bool()
  428. if subDocMapping != nil {
  429. // index by explicit mapping
  430. for _, fieldMapping := range subDocMapping.Fields {
  431. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  432. }
  433. } else if closestDocMapping.Dynamic {
  434. // automatic indexing behavior
  435. fieldMapping := newBooleanFieldMappingDynamic(context.im)
  436. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  437. }
  438. case reflect.Struct:
  439. switch property := property.(type) {
  440. case time.Time:
  441. // don't descend into the time struct
  442. if subDocMapping != nil {
  443. // index by explicit mapping
  444. for _, fieldMapping := range subDocMapping.Fields {
  445. fieldMapping.processTime(property, pathString, path, indexes, context)
  446. }
  447. } else if closestDocMapping.Dynamic {
  448. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  449. fieldMapping.processTime(property, pathString, path, indexes, context)
  450. }
  451. case encoding.TextMarshaler:
  452. txt, err := property.MarshalText()
  453. if err == nil && subDocMapping != nil {
  454. // index by explicit mapping
  455. for _, fieldMapping := range subDocMapping.Fields {
  456. if fieldMapping.Type == "text" {
  457. fieldMapping.processString(string(txt), pathString, path, indexes, context)
  458. }
  459. }
  460. }
  461. dm.walkDocument(property, path, indexes, context)
  462. default:
  463. if subDocMapping != nil {
  464. for _, fieldMapping := range subDocMapping.Fields {
  465. if fieldMapping.Type == "geopoint" {
  466. fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
  467. }
  468. }
  469. }
  470. dm.walkDocument(property, path, indexes, context)
  471. }
  472. case reflect.Map:
  473. if subDocMapping != nil {
  474. for _, fieldMapping := range subDocMapping.Fields {
  475. if fieldMapping.Type == "geopoint" {
  476. fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
  477. }
  478. }
  479. }
  480. dm.walkDocument(property, path, indexes, context)
  481. case reflect.Ptr:
  482. if !propertyValue.IsNil() {
  483. switch property := property.(type) {
  484. case encoding.TextMarshaler:
  485. txt, err := property.MarshalText()
  486. if err == nil && subDocMapping != nil {
  487. // index by explicit mapping
  488. for _, fieldMapping := range subDocMapping.Fields {
  489. if fieldMapping.Type == "text" {
  490. fieldMapping.processString(string(txt), pathString, path, indexes, context)
  491. }
  492. }
  493. } else {
  494. dm.walkDocument(property, path, indexes, context)
  495. }
  496. default:
  497. dm.walkDocument(property, path, indexes, context)
  498. }
  499. }
  500. default:
  501. dm.walkDocument(property, path, indexes, context)
  502. }
  503. }