From d83df944b1fc0f266444c31852c7730f0f41db87 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Sun, 9 Oct 2016 10:53:54 -0400 Subject: [PATCH] implement CQL to OGC filter transforms --- pycsw/ogc/csw/cql.py | 113 ++++++++++++++++++ pycsw/ogc/csw/csw2.py | 41 +++++-- pycsw/ogc/csw/csw3.py | 41 +++++-- ...etRecords-filter-cql-title-or-abstract.xml | 23 ++++ ...efault_get_GetRecords-filter-cql-title.xml | 23 ++++ ...post_GetRecords-cql-title-and-abstract.xml | 12 ++ tests/suites/default/get/requests.txt | 2 + .../GetRecords-cql-title-and-abstract.xml | 9 ++ 8 files changed, 242 insertions(+), 22 deletions(-) create mode 100644 pycsw/ogc/csw/cql.py create mode 100644 tests/expected/suites_default_get_GetRecords-filter-cql-title-or-abstract.xml create mode 100644 tests/expected/suites_default_get_GetRecords-filter-cql-title.xml create mode 100644 tests/expected/suites_default_post_GetRecords-cql-title-and-abstract.xml create mode 100644 tests/suites/default/post/GetRecords-cql-title-and-abstract.xml diff --git a/pycsw/ogc/csw/cql.py b/pycsw/ogc/csw/cql.py new file mode 100644 index 000000000..a0e8d40ca --- /dev/null +++ b/pycsw/ogc/csw/cql.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2016 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import logging + +from pycsw.core.etree import etree +from pycsw.core import util +from pycsw.ogc.fes.fes1 import MODEL as fes1_model + +LOGGER = logging.getLogger(__name__) + + +def cql2fes1(cql, namespaces): + """transforms Common Query Language (CQL) query into OGC fes1 syntax""" + + filters = [] + tmp_list = [] + logical_op = None + + LOGGER.debug('CQL: %s', cql) + + if ' or ' in cql: + logical_op = etree.Element(util.nspath_eval('ogc:Or', namespaces)) + tmp_list = cql.split(' or ') + elif ' OR ' in cql: + logical_op = etree.Element(util.nspath_eval('ogc:Or', namespaces)) + tmp_list = cql.split(' OR ') + elif ' and ' in cql: + logical_op = etree.Element(util.nspath_eval('ogc:And', namespaces)) + tmp_list = cql.split(' and ') + elif ' AND ' in cql: + logical_op = etree.Element(util.nspath_eval('ogc:And', namespaces)) + tmp_list = cql.split(' AND ') + + if tmp_list: + LOGGER.debug('Logical operator found (AND/OR)') + else: + tmp_list.append(cql) + + for t in tmp_list: + filters.append(_parse_condition(t)) + + root = etree.Element(util.nspath_eval('ogc:Filter', namespaces)) + + if logical_op is not None: + root.append(logical_op) + + for flt in filters: + condition = etree.Element(util.nspath_eval(flt[0], namespaces)) + + etree.SubElement( + condition, + util.nspath_eval('ogc:PropertyName', namespaces)).text = flt[1] + + etree.SubElement( + condition, + util.nspath_eval('ogc:Literal', namespaces)).text = flt[2] + + if logical_op is not None: + logical_op.append(condition) + else: + root.append(condition) + + LOGGER.debug('Resulting OGC Filter: %s', + etree.tostring(root, pretty_print=1)) + + return root + + +def _parse_condition(condition): + """parses a single condition""" + + LOGGER.debug('condition: %s', condition) + + property_name, operator, literal = condition.split() + + literal = literal.replace('"', '').replace('\'', '') + + for k, v in fes1_model['ComparisonOperators'].items(): + if v['opvalue'] == operator: + fes1_predicate = k + + LOGGER.debug('parsed condition: %s %s %s', property_name, fes1_predicate, + literal) + + return (fes1_predicate, property_name, literal) diff --git a/pycsw/ogc/csw/csw2.py b/pycsw/ogc/csw/csw2.py index 4893ae6b8..112a11812 100644 --- a/pycsw/ogc/csw/csw2.py +++ b/pycsw/ogc/csw/csw2.py @@ -38,6 +38,7 @@ from six.moves.configparser import SafeConfigParser from pycsw.core.etree import etree from pycsw import oaipmh, opensearch, sru +from pycsw.ogc.csw.cql import cql2fes1 from pycsw.plugins.profiles import profile as pprofile import pycsw.plugins.outputschemas from pycsw.core import config, log, metadata, util @@ -730,12 +731,20 @@ def getrecords(self): % self.parent.kvp['constraintlanguage']) if self.parent.kvp['constraintlanguage'] == 'CQL_TEXT': tmp = self.parent.kvp['constraint'] - self.parent.kvp['constraint'] = {} - self.parent.kvp['constraint']['type'] = 'cql' - self.parent.kvp['constraint']['where'] = \ - self.parent._cql_update_queryables_mappings(tmp, - self.parent.repository.queryables['_all']) - self.parent.kvp['constraint']['values'] = {} + try: + LOGGER.debug('Transforming CQL into fes1') + LOGGER.debug('CQL: %s', tmp) + self.parent.kvp['constraint'] = {} + self.parent.kvp['constraint']['type'] = 'filter' + cql = cql2fes1(tmp, self.parent.context.namespaces) + self.parent.kvp['constraint']['where'], self.parent.kvp['constraint']['values'] = fes1.parse(cql, + self.parent.repository.queryables['_all'], self.parent.repository.dbtype, + self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) + except Exception as err: + LOGGER.error('Invalid CQL query %s', tmp) + LOGGER.error('Error message: %s', err, exc_info=True) + return self.exceptionreport('InvalidParameterValue', + 'constraint', 'Invalid Filter syntax') elif self.parent.kvp['constraintlanguage'] == 'FILTER': # validate filter XML try: @@ -812,8 +821,10 @@ def getrecords(self): maxrecords=self.parent.kvp['maxrecords'], startposition=int(self.parent.kvp['startposition'])-1) except Exception as err: + LOGGER.debug('Invalid query syntax. Query: %s', self.parent.kvp['constraint']) + LOGGER.debug('Invalid query syntax. Result: %s', err) return self.exceptionreport('InvalidParameterValue', 'constraint', - 'Invalid query: %s' % err) + 'Invalid query syntax') dsresults = [] @@ -1530,13 +1541,21 @@ def _parse_constraint(self, element): self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) except Exception as err: return 'Invalid Filter request: %s' % err + tmp = element.find(util.nspath_eval('csw:CqlText', self.parent.context.namespaces)) if tmp is not None: LOGGER.debug('CQL specified: %s.', tmp.text) - query['type'] = 'cql' - query['where'] = self.parent._cql_update_queryables_mappings(tmp.text, - self.parent.repository.queryables['_all']) - query['values'] = {} + try: + LOGGER.debug('Transforming CQL into OGC Filter') + query['type'] = 'filter' + cql = cql2fes1(tmp.text, self.parent.context.namespaces) + query['where'], query['values'] = fes1.parse(cql, + self.parent.repository.queryables['_all'], self.parent.repository.dbtype, + self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) + except Exception as err: + LOGGER.error('Invalid CQL request: %s', tmp.text) + LOGGER.error('Error message: %s', err, exc_info=True) + return 'Invalid CQL request' return query def parse_postdata(self, postdata): diff --git a/pycsw/ogc/csw/csw3.py b/pycsw/ogc/csw/csw3.py index 9956c4fe8..a09718296 100644 --- a/pycsw/ogc/csw/csw3.py +++ b/pycsw/ogc/csw/csw3.py @@ -36,6 +36,7 @@ from six import StringIO from six.moves.configparser import SafeConfigParser from pycsw.core.etree import etree +from pycsw.ogc.csw.cql import cql2fes1 from pycsw import oaipmh, opensearch, sru from pycsw.plugins.profiles import profile as pprofile import pycsw.plugins.outputschemas @@ -758,12 +759,20 @@ def getrecords(self): % self.parent.kvp['constraintlanguage']) if self.parent.kvp['constraintlanguage'] == 'CQL_TEXT': tmp = self.parent.kvp['constraint'] - self.parent.kvp['constraint'] = {} - self.parent.kvp['constraint']['type'] = 'cql' - self.parent.kvp['constraint']['where'] = \ - self.parent._cql_update_queryables_mappings(tmp, - self.parent.repository.queryables['_all']) - self.parent.kvp['constraint']['values'] = {} + try: + LOGGER.debug('Transforming CQL into fes1') + LOGGER.debug('CQL: %s', tmp) + self.parent.kvp['constraint'] = {} + self.parent.kvp['constraint']['type'] = 'filter' + cql = cql2fes1(tmp, self.parent.context.namespaces) + self.parent.kvp['constraint']['where'], self.parent.kvp['constraint']['values'] = fes1.parse(cql, + self.parent.repository.queryables['_all'], self.parent.repository.dbtype, + self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) + except Exception as err: + LOGGER.error('Invalid CQL query %s', tmp) + LOGGER.error('Error message: %s', err, exc_info=True) + return self.exceptionreport('InvalidParameterValue', + 'constraint', 'Invalid Filter syntax') elif self.parent.kvp['constraintlanguage'] == 'FILTER': # validate filter XML try: @@ -849,8 +858,10 @@ def getrecords(self): maxrecords=self.parent.kvp['maxrecords'], startposition=int(self.parent.kvp['startposition'])-1) except Exception as err: + LOGGER.debug('Invalid query syntax. Query: %s', self.parent.kvp['constraint']) + LOGGER.debug('Invalid query syntax. Result: %s', err) return self.exceptionreport('InvalidParameterValue', 'constraint', - 'Invalid query: %s' % err) + 'Invalid query syntax') if int(matched) == 0: returned = nextrecord = '0' @@ -1611,13 +1622,21 @@ def _parse_constraint(self, element): self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) except Exception as err: return 'Invalid Filter request: %s' % err + tmp = element.find(util.nspath_eval('csw30:CqlText', self.parent.context.namespaces)) if tmp is not None: LOGGER.debug('CQL specified: %s.', tmp.text) - query['type'] = 'cql' - query['where'] = self.parent._cql_update_queryables_mappings(tmp.text, - self.parent.repository.queryables['_all']) - query['values'] = {} + try: + LOGGER.debug('Transforming CQL into OGC Filter') + query['type'] = 'filter' + cql = cql2fes1(tmp.text, self.parent.context.namespaces) + query['where'], query['values'] = fes1.parse(cql, + self.parent.repository.queryables['_all'], self.parent.repository.dbtype, + self.parent.context.namespaces, self.parent.orm, self.parent.language['text'], self.parent.repository.fts) + except Exception as err: + LOGGER.error('Invalid CQL request: %s', tmp.text) + LOGGER.error('Error message: %s', err, exc_info=True) + return 'Invalid CQL request' return query def parse_postdata(self, postdata): diff --git a/tests/expected/suites_default_get_GetRecords-filter-cql-title-or-abstract.xml b/tests/expected/suites_default_get_GetRecords-filter-cql-title-or-abstract.xml new file mode 100644 index 000000000..2b1accdf9 --- /dev/null +++ b/tests/expected/suites_default_get_GetRecords-filter-cql-title-or-abstract.xml @@ -0,0 +1,23 @@ + + + + + + + urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f + http://purl.org/dc/dcmitype/Image + image/svg+xml + Lorem ipsum + GR-22 + Tourism--Greece + Quisque lacus diam, placerat mollis, pharetra in, commodo sed, augue. Duis iaculis arcu vel arcu. + + + urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2 + http://purl.org/dc/dcmitype/Image + Lorem ipsum dolor sit amet + image/jpeg + IT-FI + + + diff --git a/tests/expected/suites_default_get_GetRecords-filter-cql-title.xml b/tests/expected/suites_default_get_GetRecords-filter-cql-title.xml new file mode 100644 index 000000000..2b1accdf9 --- /dev/null +++ b/tests/expected/suites_default_get_GetRecords-filter-cql-title.xml @@ -0,0 +1,23 @@ + + + + + + + urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f + http://purl.org/dc/dcmitype/Image + image/svg+xml + Lorem ipsum + GR-22 + Tourism--Greece + Quisque lacus diam, placerat mollis, pharetra in, commodo sed, augue. Duis iaculis arcu vel arcu. + + + urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2 + http://purl.org/dc/dcmitype/Image + Lorem ipsum dolor sit amet + image/jpeg + IT-FI + + + diff --git a/tests/expected/suites_default_post_GetRecords-cql-title-and-abstract.xml b/tests/expected/suites_default_post_GetRecords-cql-title-and-abstract.xml new file mode 100644 index 000000000..4ed61a2ee --- /dev/null +++ b/tests/expected/suites_default_post_GetRecords-cql-title-and-abstract.xml @@ -0,0 +1,12 @@ + + + + + + + urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f + Lorem ipsum + http://purl.org/dc/dcmitype/Image + + + diff --git a/tests/suites/default/get/requests.txt b/tests/suites/default/get/requests.txt index c75edbd5c..97daa7a47 100644 --- a/tests/suites/default/get/requests.txt +++ b/tests/suites/default/get/requests.txt @@ -6,6 +6,8 @@ GetRecords-sortby-desc,PYCSW_SERVER?config=tests/suites/default/default.cfg&serv GetRecords-sortby-invalid-propertyname,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&resulttype=results&sortby=dc:titlei:A GetRecords-sortby-invalid-order,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&resulttype=results&sortby=dc:title:FOO GetRecords-filter,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&resulttype=results&constraintlanguage=FILTER&constraint=%3Cogc%3AFilter%20xmlns%3Aogc%3D%22http%3A%2F%2Fwww.opengis.net%2Fogc%22%3E%3Cogc%3APropertyIsEqualTo%3E%3Cogc%3APropertyName%3Edc%3Atitle%3C%2Fogc%3APropertyName%3E%3Cogc%3ALiteral%3ELorem%20ipsum%3C%2Fogc%3ALiteral%3E%3C%2Fogc%3APropertyIsEqualTo%3E%3C%2Fogc%3AFilter%3E +GetRecords-filter-cql-title,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&resulttype=results&constraintlanguage=CQL_TEXT&constraint=dc%3Atitle%20like%20%27%25lor%25%27 +GetRecords-filter-cql-title-or-abstract,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&resulttype=results&constraintlanguage=CQL_TEXT&constraint=dc%3Atitle%20like%20%27%25lor%25%27%20or%20dct%3Aabstract%20like%20%27%25pharetra%25%27 GetRecords-empty-maxrecords,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRecords&typenames=csw:Record&elementsetname=full&maxrecords= GetRepositoryItem,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRepositoryItem&id=urn:uuid:94bc9c83-97f6-4b40-9eb8-a8e8787a5c63 Exception-GetRepositoryItem-notfound,PYCSW_SERVER?config=tests/suites/default/default.cfg&service=CSW&version=2.0.2&request=GetRepositoryItem&id=NOTFOUND diff --git a/tests/suites/default/post/GetRecords-cql-title-and-abstract.xml b/tests/suites/default/post/GetRecords-cql-title-and-abstract.xml new file mode 100644 index 000000000..9bdb5795c --- /dev/null +++ b/tests/suites/default/post/GetRecords-cql-title-and-abstract.xml @@ -0,0 +1,9 @@ + + + + brief + + dc:title like '%ips%' and dct:abstract like '%pharetra%' + + +