QGIS API Documentation  2.99.0-Master (9caa722)
qgsgmlschema.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsgmlschema.cpp
3  --------------------------------------
4  Date : February 2013
5  Copyright : (C) 2013 by Radim Blazek
6  Email : [email protected]
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 #include "qgsgmlschema.h"
16 #include "qgsrectangle.h"
18 #include "qgserror.h"
19 #include "qgsgeometry.h"
20 #include "qgslogger.h"
22 #include <QBuffer>
23 #include <QList>
24 #include <QNetworkRequest>
25 #include <QNetworkReply>
26 #include <QProgressDialog>
27 #include <QSet>
28 #include <QSettings>
29 #include <QUrl>
30 
31 #include <limits>
32 
33 const char NS_SEPARATOR = '?';
34 const QString GML_NAMESPACE = QStringLiteral( "http://www.opengis.net/gml" );
35 
36 
37 QgsGmlFeatureClass::QgsGmlFeatureClass( const QString &name, const QString &path )
38  : mName( name )
39  , mPath( path )
40 {
41 }
42 
43 int QgsGmlFeatureClass::fieldIndex( const QString &name )
44 {
45  for ( int i = 0; i < mFields.size(); i++ )
46  {
47  if ( mFields[i].name() == name ) return i;
48  }
49  return -1;
50 }
51 
52 // --------------------------- QgsGmlSchema -------------------------------
54  : QObject()
55  , mFeatureCount( 0 )
56  , mLevel( 0 )
57  , mSkipLevel( std::numeric_limits<int>::max() )
58 {
59  mGeometryTypes << QStringLiteral( "Point" ) << QStringLiteral( "MultiPoint" )
60  << QStringLiteral( "LineString" ) << QStringLiteral( "MultiLineString" )
61  << QStringLiteral( "Polygon" ) << QStringLiteral( "MultiPolygon" );
62 }
63 
64 QString QgsGmlSchema::readAttribute( const QString &attributeName, const XML_Char **attr ) const
65 {
66  int i = 0;
67  while ( attr[i] )
68  {
69  if ( attributeName.compare( attr[i] ) == 0 )
70  {
71  return QString( attr[i + 1] );
72  }
73  i += 2;
74  }
75  return QString();
76 }
77 
78 bool QgsGmlSchema::parseXSD( const QByteArray &xml )
79 {
80  QDomDocument dom;
81  QString errorMsg;
82  int errorLine;
83  int errorColumn;
84  if ( !dom.setContent( xml, false, &errorMsg, &errorLine, &errorColumn ) )
85  {
86  // TODO: error
87  return false;
88  }
89 
90  QDomElement docElem = dom.documentElement();
91 
92  QList<QDomElement> elementElements = domElements( docElem, QStringLiteral( "element" ) );
93 
94  //QgsDebugMsg( QString( "%1 elemets read" ).arg( elementElements.size() ) );
95 
96  Q_FOREACH ( const QDomElement &elementElement, elementElements )
97  {
98  QString name = elementElement.attribute( QStringLiteral( "name" ) );
99  QString type = elementElement.attribute( QStringLiteral( "type" ) );
100 
101  QString gmlBaseType = xsdComplexTypeGmlBaseType( docElem, stripNS( type ) );
102  //QgsDebugMsg( QString( "gmlBaseType = %1" ).arg( gmlBaseType ) );
103  //QgsDebugMsg( QString( "name = %1 gmlBaseType = %2" ).arg( name ).arg( gmlBaseType ) );
104  // We should only use gml:AbstractFeatureType descendants which have
105  // ancestor listed in gml:FeatureAssociationType (featureMember) descendant
106  // But we could only loose some data if XSD was not correct, I think.
107 
108  if ( gmlBaseType == QLatin1String( "AbstractFeatureType" ) )
109  {
110  // Get feature type definition
111  QgsGmlFeatureClass featureClass( name, QLatin1String( "" ) );
112  xsdFeatureClass( docElem, stripNS( type ), featureClass );
113  mFeatureClassMap.insert( name, featureClass );
114  }
115  // A feature may have more geometries, we take just the first one
116  }
117 
118  return true;
119 }
120 
121 bool QgsGmlSchema::xsdFeatureClass( const QDomElement &element, const QString &typeName, QgsGmlFeatureClass &featureClass )
122 {
123  //QgsDebugMsg("typeName = " + typeName );
124  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), typeName );
125  if ( complexTypeElement.isNull() ) return false;
126 
127  // extension or restriction
128  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
129  if ( extrest.isNull() )
130  {
131  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
132  }
133  if ( extrest.isNull() ) return false;
134 
135  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
136  if ( extrestName == QLatin1String( "gml:AbstractFeatureType" ) )
137  {
138  // In theory we should add gml:AbstractFeatureType default attributes gml:description
139  // and gml:name but it does not seem to be a common practice and we would probably
140  // confuse most users
141  }
142  else
143  {
144  // Get attributes from extrest
145  if ( !xsdFeatureClass( element, stripNS( extrestName ), featureClass ) ) return false;
146  }
147 
148  // Supported geometry types
149  QStringList geometryPropertyTypes;
150  Q_FOREACH ( const QString &geom, mGeometryTypes )
151  {
152  geometryPropertyTypes << geom + "PropertyType";
153  }
154 
155  QStringList geometryAliases;
156  geometryAliases << QStringLiteral( "location" ) << QStringLiteral( "centerOf" ) << QStringLiteral( "position" ) << QStringLiteral( "extentOf" )
157  << QStringLiteral( "coverage" ) << QStringLiteral( "edgeOf" ) << QStringLiteral( "centerLineOf" ) << QStringLiteral( "multiLocation" )
158  << QStringLiteral( "multiCenterOf" ) << QStringLiteral( "multiPosition" ) << QStringLiteral( "multiCenterLineOf" )
159  << QStringLiteral( "multiEdgeOf" ) << QStringLiteral( "multiCoverage" ) << QStringLiteral( "multiExtentOf" );
160 
161  // Add attributes from current comple type
162  QList<QDomElement> sequenceElements = domElements( extrest, QStringLiteral( "sequence.element" ) );
163  Q_FOREACH ( const QDomElement &sequenceElement, sequenceElements )
164  {
165  QString fieldName = sequenceElement.attribute( QStringLiteral( "name" ) );
166  QString fieldTypeName = stripNS( sequenceElement.attribute( QStringLiteral( "type" ) ) );
167  QString ref = sequenceElement.attribute( QStringLiteral( "ref" ) );
168  //QgsDebugMsg ( QString("fieldName = %1 fieldTypeName = %2 ref = %3").arg(fieldName).arg(fieldTypeName).arg(ref) );
169 
170  if ( !ref.isEmpty() )
171  {
172  if ( ref.startsWith( QLatin1String( "gml:" ) ) )
173  {
174  if ( geometryAliases.contains( stripNS( ref ) ) )
175  {
176  featureClass.geometryAttributes().append( stripNS( ref ) );
177  }
178  else
179  {
180  QgsDebugMsg( QString( "Unknown referenced GML element: %1" ).arg( ref ) );
181  }
182  }
183  else
184  {
185  // TODO: get type from referenced element
186  QgsDebugMsg( QString( "field %1.%2 is referencing %3 - not supported" ).arg( typeName, fieldName ) );
187  }
188  continue;
189  }
190 
191  if ( fieldName.isEmpty() )
192  {
193  QgsDebugMsg( QString( "field in %1 without name" ).arg( typeName ) );
194  continue;
195  }
196 
197  // type is either type attribute
198  if ( fieldTypeName.isEmpty() )
199  {
200  // or type is inheriting from xs:simpleType
201  QDomElement sequenceElementRestriction = domElement( sequenceElement, QStringLiteral( "simpleType.restriction" ) );
202  fieldTypeName = stripNS( sequenceElementRestriction.attribute( QStringLiteral( "base" ) ) );
203  }
204 
205  QVariant::Type fieldType = QVariant::String;
206  if ( fieldTypeName.isEmpty() )
207  {
208  QgsDebugMsg( QString( "Cannot get %1.%2 field type" ).arg( typeName, fieldName ) );
209  }
210  else
211  {
212  if ( geometryPropertyTypes.contains( fieldTypeName ) )
213  {
214  // Geometry attribute
215  featureClass.geometryAttributes().append( fieldName );
216  continue;
217  }
218 
219  if ( fieldTypeName == QLatin1String( "decimal" ) )
220  {
221  fieldType = QVariant::Double;
222  }
223  else if ( fieldTypeName == QLatin1String( "integer" ) )
224  {
225  fieldType = QVariant::Int;
226  }
227  }
228 
229  QgsField field( fieldName, fieldType, fieldTypeName );
230  featureClass.fields().append( field );
231  }
232 
233  return true;
234 }
235 
236 QString QgsGmlSchema::xsdComplexTypeGmlBaseType( const QDomElement &element, const QString &name )
237 {
238  //QgsDebugMsg("name = " + name );
239  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), name );
240  if ( complexTypeElement.isNull() ) return QLatin1String( "" );
241 
242  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
243  if ( extrest.isNull() )
244  {
245  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
246  }
247  if ( extrest.isNull() ) return QLatin1String( "" );
248 
249  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
250  if ( extrestName.startsWith( QLatin1String( "gml:" ) ) )
251  {
252  // GML base type found
253  return stripNS( extrestName );
254  }
255  // Continue recursively until GML base type is reached
256  return xsdComplexTypeGmlBaseType( element, stripNS( extrestName ) );
257 }
258 
259 QString QgsGmlSchema::stripNS( const QString &name )
260 {
261  return name.contains( ':' ) ? name.section( ':', 1 ) : name;
262 }
263 
264 QList<QDomElement> QgsGmlSchema::domElements( const QDomElement &element, const QString &path )
265 {
266  QList<QDomElement> list;
267 
268  QStringList names = path.split( '.' );
269  if ( names.isEmpty() ) return list;
270  QString name = names.value( 0 );
271  names.removeFirst();
272 
273  QDomNode n1 = element.firstChild();
274  while ( !n1.isNull() )
275  {
276  QDomElement el = n1.toElement();
277  if ( !el.isNull() )
278  {
279  QString tagName = stripNS( el.tagName() );
280  if ( tagName == name )
281  {
282  if ( names.isEmpty() )
283  {
284  list.append( el );
285  }
286  else
287  {
288  list.append( domElements( el, names.join( QStringLiteral( "." ) ) ) );
289  }
290  }
291  }
292  n1 = n1.nextSibling();
293  }
294 
295  return list;
296 }
297 
298 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path )
299 {
300  return domElements( element, path ).value( 0 );
301 }
302 
303 QList<QDomElement> QgsGmlSchema::domElements( QList<QDomElement> &elements, const QString &attr, const QString &attrVal )
304 {
305  QList<QDomElement> list;
306  Q_FOREACH ( const QDomElement &el, elements )
307  {
308  if ( el.attribute( attr ) == attrVal )
309  {
310  list << el;
311  }
312  }
313  return list;
314 }
315 
316 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path, const QString &attr, const QString &attrVal )
317 {
318  QList<QDomElement> list = domElements( element, path );
319  return domElements( list, attr, attrVal ).value( 0 );
320 }
321 
322 bool QgsGmlSchema::guessSchema( const QByteArray &data )
323 {
324  mLevel = 0;
325  mSkipLevel = std::numeric_limits<int>::max();
326  XML_Parser p = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
327  XML_SetUserData( p, this );
328  XML_SetElementHandler( p, QgsGmlSchema::start, QgsGmlSchema::end );
329  XML_SetCharacterDataHandler( p, QgsGmlSchema::chars );
330  int atEnd = 1;
331  int res = XML_Parse( p, data.constData(), data.size(), atEnd );
332 
333  if ( res == 0 )
334  {
335  QString err = QString( XML_ErrorString( XML_GetErrorCode( p ) ) );
336  QgsDebugMsg( QString( "XML_Parse returned %1 error %2" ).arg( res ).arg( err ) );
337  mError = QgsError( err, QStringLiteral( "GML schema" ) );
338  mError.append( tr( "Cannot guess schema" ) );
339  }
340 
341  return res != 0;
342 }
343 
344 void QgsGmlSchema::startElement( const XML_Char *el, const XML_Char **attr )
345 {
346  Q_UNUSED( attr );
347  mLevel++;
348 
349  QString elementName = QString::fromUtf8( el );
350  QgsDebugMsgLevel( QString( "-> %1 %2 %3" ).arg( mLevel ).arg( elementName, mLevel >= mSkipLevel ? "skip" : "" ), 5 );
351 
352  if ( mLevel >= mSkipLevel )
353  {
354  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
355  return;
356  }
357 
358  mParsePathStack.append( elementName );
359  QString path = mParsePathStack.join( QStringLiteral( "." ) );
360 
361  QStringList splitName = elementName.split( NS_SEPARATOR );
362  QString localName = splitName.last();
363  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
364  //QgsDebugMsg( "ns = " + ns + " localName = " + localName );
365 
366  ParseMode parseMode = modeStackTop();
367  //QgsDebugMsg ( QString("localName = %1 parseMode = %2").arg(localName).arg(parseMode) );
368 
369  if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
370  {
371  // gml:boundedBy in feature or feature collection -> skip
372  mSkipLevel = mLevel + 1;
373  }
374  else if ( localName.compare( QLatin1String( "featureMembers" ), Qt::CaseInsensitive ) == 0 )
375  {
376  mParseModeStack.push( QgsGmlSchema::FeatureMembers );
377  }
378  // GML does not specify that gml:FeatureAssociationType elements should end
379  // with 'Member' apart standard gml:featureMember, but it is quite usual to
380  // that the names ends with 'Member', e.g.: osgb:topographicMember, cityMember,...
381  // so this is really fail if the name does not contain 'Member'
382 
383  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
384  {
385  mParseModeStack.push( QgsGmlSchema::FeatureMember );
386  }
387  // UMN Mapserver simple GetFeatureInfo response layer element (ends with _layer)
388  else if ( elementName.endsWith( QLatin1String( "_layer" ) ) )
389  {
390  // do nothing, we catch _feature children
391  }
392  // UMN Mapserver simple GetFeatureInfo response feature element (ends with _feature)
393  // or featureMember children.
394  // QGIS mapserver 2.2 GetFeatureInfo is using <Feature id="###"> for feature member,
395  // without any feature class distinction.
396  else if ( elementName.endsWith( QLatin1String( "_feature" ) )
397  || parseMode == QgsGmlSchema::FeatureMember
398  || parseMode == QgsGmlSchema::FeatureMembers
399  || localName.compare( QLatin1String( "feature" ), Qt::CaseInsensitive ) == 0 )
400  {
401  QgsDebugMsg( "is feature path = " + path );
402  if ( mFeatureClassMap.count( localName ) == 0 )
403  {
404  mFeatureClassMap.insert( localName, QgsGmlFeatureClass( localName, path ) );
405  }
406  mCurrentFeatureName = localName;
407  mParseModeStack.push( QgsGmlSchema::Feature );
408  }
409  else if ( parseMode == QgsGmlSchema::Attribute && ns == GML_NAMESPACE && mGeometryTypes.indexOf( localName ) >= 0 )
410  {
411  // Geometry (Point,MultiPoint,...) in geometry attribute
412  QStringList &geometryAttributes = mFeatureClassMap[mCurrentFeatureName].geometryAttributes();
413  if ( geometryAttributes.count( mAttributeName ) == 0 )
414  {
415  geometryAttributes.append( mAttributeName );
416  }
417  mSkipLevel = mLevel + 1; // no need to parse children
418  }
419  else if ( parseMode == QgsGmlSchema::Feature )
420  {
421  // An element in feature should be ordinary or geometry attribute
422  //QgsDebugMsg( "is attribute");
423 
424  // Usually localName is attribute name, e.g.
425  // <gml:desc>My description</gml:desc>
426  // but QGIS server (2.2) is using:
427  // <Attribute value="My description" name="desc"/>
428  QString name = readAttribute( QStringLiteral( "name" ), attr );
429  //QgsDebugMsg ( "attribute name = " + name );
430  if ( localName.compare( QLatin1String( "attribute" ), Qt::CaseInsensitive ) == 0
431  && !name.isEmpty() )
432  {
433  QString value = readAttribute( QStringLiteral( "value" ), attr );
434  //QgsDebugMsg ( "attribute value = " + value );
435  addAttribute( name, value );
436  }
437  else
438  {
439  mAttributeName = localName;
440  mParseModeStack.push( QgsGmlSchema::Attribute );
441  mStringCash.clear();
442  }
443  }
444 }
445 
446 void QgsGmlSchema::endElement( const XML_Char *el )
447 {
448  QString elementName = QString::fromUtf8( el );
449  QgsDebugMsgLevel( QString( "<- %1 %2" ).arg( mLevel ).arg( elementName ), 5 );
450 
451  if ( mLevel >= mSkipLevel )
452  {
453  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
454  mLevel--;
455  return;
456  }
457  else
458  {
459  // clear possible skip level
460  mSkipLevel = std::numeric_limits<int>::max();
461  }
462 
463  QStringList splitName = elementName.split( NS_SEPARATOR );
464  QString localName = splitName.last();
465  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
466 
467  QgsGmlSchema::ParseMode parseMode = modeStackTop();
468 
469  if ( parseMode == QgsGmlSchema::FeatureMembers )
470  {
471  modeStackPop();
472  }
473  else if ( parseMode == QgsGmlSchema::Attribute && localName == mAttributeName )
474  {
475  // End of attribute
476  //QgsDebugMsg("end attribute");
477  modeStackPop(); // go up to feature
478 
479  if ( mFeatureClassMap[mCurrentFeatureName].geometryAttributes().count( mAttributeName ) == 0 )
480  {
481  addAttribute( mAttributeName, mStringCash );
482  }
483  }
484  else if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
485  {
486  // was skipped
487  }
488  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
489  {
490  modeStackPop();
491  }
492  mParsePathStack.removeLast();
493  mLevel--;
494 }
495 
496 void QgsGmlSchema::characters( const XML_Char *chars, int len )
497 {
498  //QgsDebugMsg( QString("level %1 : %2").arg( mLevel ).arg( QString::fromUtf8( chars, len ) ) );
499  if ( mLevel >= mSkipLevel )
500  {
501  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
502  return;
503  }
504 
505  //save chars in mStringCash attribute mode for value type analysis
506  if ( modeStackTop() == QgsGmlSchema::Attribute )
507  {
508  mStringCash.append( QString::fromUtf8( chars, len ) );
509  }
510 }
511 
512 void QgsGmlSchema::addAttribute( const QString &name, const QString &value )
513 {
514  // It is not geometry attribute -> analyze value
515  bool ok;
516  value.toInt( &ok );
517  QVariant::Type type = QVariant::String;
518  if ( ok )
519  {
520  type = QVariant::Int;
521  }
522  else
523  {
524  value.toDouble( &ok );
525  if ( ok )
526  {
527  type = QVariant::Double;
528  }
529  }
530  //QgsDebugMsg( "mStringCash = " + mStringCash + " type = " + QVariant::typeToName( type ) );
531  //QMap<QString, QgsField> & fields = mFeatureClassMap[mCurrentFeatureName].fields();
532  QList<QgsField> &fields = mFeatureClassMap[mCurrentFeatureName].fields();
533  int fieldIndex = mFeatureClassMap[mCurrentFeatureName].fieldIndex( name );
534  if ( fieldIndex == -1 )
535  {
536  QgsField field( name, type );
537  fields.append( field );
538  }
539  else
540  {
541  QgsField &field = fields[fieldIndex];
542  // check if type is sufficient
543  if ( ( field.type() == QVariant::Int && ( type == QVariant::String || type == QVariant::Double ) ) ||
544  ( field.type() == QVariant::Double && type == QVariant::String ) )
545  {
546  field.setType( type );
547  }
548  }
549 }
550 
551 QStringList QgsGmlSchema::typeNames() const
552 {
553  return mFeatureClassMap.keys();
554 }
555 
556 QList<QgsField> QgsGmlSchema::fields( const QString &typeName )
557 {
558  if ( mFeatureClassMap.count( typeName ) == 0 ) return QList<QgsField>();
559  return mFeatureClassMap[typeName].fields();
560 }
561 
562 QStringList QgsGmlSchema::geometryAttributes( const QString &typeName )
563 {
564  if ( mFeatureClassMap.count( typeName ) == 0 ) return QStringList();
565  return mFeatureClassMap[typeName].geometryAttributes();
566 }
bool guessSchema(const QByteArray &data)
Guess GML schema from data if XSD does not exist.
#define QgsDebugMsg(str)
Definition: qgslogger.h:37
const QString GML_NAMESPACE
QList< QgsField > & fields()
Definition: qgsgmlschema.h:49
#define QgsDebugMsgLevel(str, level)
Definition: qgslogger.h:38
QList< QgsField > fields(const QString &typeName)
Get fields for type/class name parsed from GML or XSD.
void append(const QString &message, const QString &tag)
Append new error message.
Definition: qgserror.cpp:40
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:46
Description of feature class in GML.
Definition: qgsgmlschema.h:39
void setType(QVariant::Type type)
Set variant type.
Definition: qgsfield.cpp:139
QgsError is container for error messages (report).
Definition: qgserror.h:82
const char NS_SEPARATOR
QStringList & geometryAttributes()
Definition: qgsgmlschema.h:55
QgsGmlFeatureClass()=default
Constructor for QgsGmlFeatureClass.
QVariant::Type type() const
Gets variant type of the field as it will be retrieved from data source.
Definition: qgsfield.cpp:93
QStringList typeNames() const
Get list of dot separated paths to feature classes parsed from GML or XSD.
int fieldIndex(const QString &name)
bool parseXSD(const QByteArray &xml)
Get fields info from XSD.
QStringList geometryAttributes(const QString &typeName)
Get list of geometry attributes for type/class name.