XmlLingDefFormatter.cc

Go to the documentation of this file.
00001 #include "LinguisticDefinition/XmlLingDefFormatter.h"
00002 
00003 using namespace std;
00004 using namespace LinguisticDefinition;
00005 
00009 XmlLingDefFormatter::XmlLingDefFormatter() {
00010 }
00011 
00015 LingDef XmlLingDefFormatter::createLingDef(const string &xmlData,
00016                                            const string &isoLanguageCode) {
00017   LingDef lingDef(isoLanguageCode);
00018   xmlDocPtr doc = xmlParseDoc((xmlChar *) xmlData.c_str());
00019   if (doc != NULL) {
00020     populateLingDef(doc, lingDef);
00021     xmlFreeDoc(doc);
00022   }
00023   return lingDef;
00024 }
00025 
00029 LingDef XmlLingDefFormatter::createLingDef(xmlDocPtr doc,
00030                                            const string &isoLanguageCode) {
00031   LingDef lingDef(isoLanguageCode);
00032   populateLingDef(doc, lingDef);
00033   return lingDef;
00034 }
00035 
00039 bool XmlLingDefFormatter::populateLingDef(const string &xmlData,
00040                                           LingDef &lingDef) {
00041   bool populationOk = false;
00042   xmlDocPtr doc = xmlParseDoc((xmlChar *) xmlData.c_str());
00043   if (doc != NULL) {
00044     populationOk = populateLingDef(doc, lingDef);
00045     xmlFreeDoc(doc);
00046   }
00047   return populationOk;
00048 }
00049 
00053 bool XmlLingDefFormatter::populateLingDef(xmlDocPtr doc,
00054                                           LingDef &lingDef) {
00055   bool populationOk = true;
00056 
00057   xmlXPathContextPtr xpContext = xmlXPathNewContext(doc);
00058 
00059   xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "/LingDef", xpContext);
00060   if (xpObj != NULL) {
00061     xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00062     if (nodeSet != NULL) {
00063       {for (int i = 0; i < nodeSet->nodeNr; i++) {
00064         if (langOk(nodeSet->nodeTab[i], lingDef)) {
00065           addLingDef(nodeSet->nodeTab[i], lingDef);
00066         }
00067       }}
00068     }
00069     xmlXPathFreeObject(xpObj);
00070   }
00071 
00072   xmlXPathFreeContext(xpContext);
00073 
00074   return populationOk;
00075 }
00076 
00080 bool XmlLingDefFormatter::langOk(xmlNodePtr node, const LingDef &lingDef) const {
00081   bool ok = true;
00082   if (node != NULL) {
00083     xmlChar *tmp = xmlGetProp(node, (xmlChar *) "lang");
00084     if (tmp != NULL) {
00085       string nodeLang = (char *) tmp;
00086       xmlFree(tmp);
00087 
00088       ok = langOk(nodeLang, lingDef.getIsoLanguageCode());
00089     }
00090   }
00091   return ok;
00092 }
00093 
00097 bool XmlLingDefFormatter::langOk(const string &nodeLang,
00098                                  const string &lang) const {
00099   bool ok = true;
00100   if (lang != "" && nodeLang != "") {
00101     string::size_type index = nodeLang.find(lang);
00102     if (index == string::npos) {
00103       // Not in list
00104       ok = false;
00105 
00106       // See if negation
00107       if (nodeLang.find('!') != string::npos) {
00108         ok = true;
00109       }
00110           
00111     } else {
00112       // In list
00113       if (index > 0 && nodeLang[index - 1] == '!') {
00114         // Not this language
00115         ok = false;
00116       }
00117     }
00118   }
00119   return ok;
00120 }
00121 
00125 bool XmlLingDefFormatter::addLingDef(xmlNodePtr lingDefNode, LingDef &lingDef) {
00126   xmlXPathContextPtr xpContext = xmlXPathNewContext(lingDefNode->doc);
00127   xpContext->node = lingDefNode;
00128 
00129   map<string, LingDef::Pos *> posIdMap;
00130   set<string> posIds;
00131   set<string> needsPosIds;
00132 
00133   // Get semantic trees
00134   {
00135     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Tree", xpContext);
00136     if (xpObj != NULL) {
00137       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00138       if (nodeSet != NULL) {
00139         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00140           if (langOk(nodeSet->nodeTab[i], lingDef)) {
00141             addTree(nodeSet->nodeTab[i], lingDef);
00142           }
00143         }}
00144       }
00145       xmlXPathFreeObject(xpObj);
00146     }
00147   }
00148   
00149   // Get parts of speech
00150   {
00151     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Pos", xpContext);
00152     if (xpObj != NULL) {
00153       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00154       if (nodeSet != NULL) {
00155         bool done;
00156         do {
00157           done = true;
00158           {for (int i = 0; i < nodeSet->nodeNr; i++) {
00159             xmlNodePtr posNode = nodeSet->nodeTab[i];
00160             if (langOk(posNode, lingDef)) {
00161               string posId;
00162               string posInheritsId;
00163               getPosIds(posNode, posId, posInheritsId);
00164 
00165               posIds.insert(posId);
00166               if (posInheritsId != "") {
00167                 needsPosIds.insert(posInheritsId);
00168               }
00169 
00170               if (posIdMap[posId] == NULL) {
00171                 bool parentOk = true;
00172                 LingDef::Pos *parentPos = NULL;
00173                 if (posInheritsId != "") {
00174                   parentPos = posIdMap[posInheritsId];
00175                   if (parentPos == NULL) {
00176                     parentOk = false;
00177                     done = false;
00178                   }
00179                 }
00180                 if (parentOk) {
00181                   posIdMap[posId] = addPos(posNode, lingDef, parentPos);
00182                 }
00183               }
00184             }
00185           }}
00186 
00187           if (!done) {
00188             // We are about to do another loop
00189             // Check if the pos id's we need exist
00190             {for (set<string>::iterator it = needsPosIds.begin();
00191                   it != needsPosIds.end(); ++it) {
00192               if (posIds.find(*it) == posIds.end()) {
00193                 cerr << "Super pos " << *it << " not defined" << endl;
00194                 done = true;
00195               }
00196             }}
00197           }
00198 
00199         } while (!done);
00200       }
00201       xmlXPathFreeObject(xpObj);
00202     }
00203   }
00204 
00205   xmlXPathFreeContext(xpContext);
00206 
00207   return true;
00208 }
00209 
00213 void XmlLingDefFormatter::getPosIds(xmlNodePtr posNode,
00214                                     string &id,
00215                                     string &inheritsId) {
00216   string posId;
00217   {
00218     xmlChar *tmp = xmlGetProp(posNode, (xmlChar *) "id");
00219     if (tmp != NULL) {
00220       id = (char *) tmp;
00221       xmlFree(tmp);
00222     }
00223   }
00224 
00225   string superPosId;
00226   {
00227     xmlChar *tmp = xmlGetProp(posNode, (xmlChar *) "inherits");
00228     if (tmp != NULL) {
00229       inheritsId = (char *) tmp;
00230       xmlFree(tmp);
00231     }
00232   }
00233 }
00234 
00238 LingDef::Pos *XmlLingDefFormatter::addPos(xmlNodePtr posNode, LingDef &lingDef,
00239                                           LingDef::Pos *parentPos) {
00240   xmlXPathContextPtr xpContext = xmlXPathNewContext(posNode->doc);
00241   xpContext->node = posNode;
00242 
00243   xmlNodePtr posNoteNode = NULL;
00244   {
00245     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Note", xpContext);
00246     if (xpObj != NULL) {
00247       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00248       if (nodeSet != NULL) {
00249         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00250           if (langOk(nodeSet->nodeTab[i], lingDef)) {
00251             posNoteNode = nodeSet->nodeTab[i];
00252           }
00253         }}
00254       }
00255       xmlXPathFreeObject(xpObj);
00256     }
00257   }
00258 
00259   xmlNodePtr posNameNode = NULL;
00260   {
00261     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Name", xpContext);
00262     if (xpObj != NULL) {
00263       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00264       if (nodeSet != NULL) {
00265         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00266           if (langOk(nodeSet->nodeTab[i], lingDef)) {
00267             posNameNode = nodeSet->nodeTab[i];
00268           }
00269         }}
00270       }
00271       xmlXPathFreeObject(xpObj);
00272     }
00273   }
00274 
00275   string posId;
00276   {
00277     xmlChar *tmp = xmlGetProp(posNode, (xmlChar *) "id");
00278     if (tmp != NULL) {
00279       posId = (char *) tmp;
00280       xmlFree(tmp);
00281     }
00282   }
00283 
00284   string superPosId;
00285   {
00286     xmlChar *tmp = xmlGetProp(posNode, (xmlChar *) "inherits");
00287     if (tmp != NULL) {
00288       superPosId = (char *) tmp;
00289       xmlFree(tmp);
00290     }
00291   }
00292 
00293   string posName;
00294   {
00295     if (posNameNode != NULL) {
00296       xmlChar *tmp = xmlNodeGetContent(posNameNode);
00297       if (tmp != NULL) {
00298         posName = (char *) tmp;
00299         xmlFree(tmp);
00300       }
00301     }
00302   }
00303 
00304   string posShortName;
00305   {
00306     if (posNameNode != NULL) {
00307       xmlChar *tmp = xmlGetProp(posNameNode, (xmlChar *) "short");
00308       if (tmp != NULL) {
00309         posShortName = (char *) tmp;
00310         xmlFree(tmp);
00311       }
00312     }
00313   }
00314 
00315   LingDef::Pos::Type posType = LingDef::Pos::ELEMENT;
00316   {
00317     xmlChar *tmp = xmlGetProp(posNode, (xmlChar *) "type");
00318     if (tmp != NULL) {
00319       if (!xmlStrcasecmp(tmp, (xmlChar *) "virtual")) {
00320         posType = LingDef::Pos::VIRTUAL;
00321 
00322       } else if (!xmlStrcasecmp(tmp, (xmlChar *) "syntagm")) {
00323         posType = LingDef::Pos::SYNTAGM;
00324       }
00325       xmlFree(tmp);
00326     }
00327   }
00328 
00329   // We now have enough information to create the Pos object
00330   LingDef::Pos *pos;
00331   if (posType == LingDef::Pos::VIRTUAL) {
00332     if (parentPos != NULL) {
00333       pos = &parentPos->createVirtualSubPos();
00334     } else {
00335       pos = &lingDef.createVirtualPos();
00336     }
00337   } else {
00338     if (parentPos != NULL) {
00339       pos = &parentPos->createSubPos(posName);
00340     } else {
00341       pos = &lingDef.createPos(posName);
00342     }
00343   }
00344 
00345   pos->setType(posType);
00346 
00347   if (posNoteNode != NULL) {
00348     {
00349       xmlChar *tmp = xmlNodeGetContent(posNoteNode);
00350       if (tmp != NULL) {
00351         pos->setNote((char *) tmp);
00352         xmlFree(tmp);
00353       }
00354     }
00355   }
00356 
00357   if (posShortName != "") {
00358     pos->setShortName(posShortName);
00359   }
00360 
00361   // Get features
00362   {
00363     xmlXPathObjectPtr xpObj =
00364       xmlXPathEval((xmlChar *) ".//Boolean|.//Enum|.//Reference|.//VTree",
00365                    xpContext);
00366     if (xpObj != NULL) {
00367       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00368       if (nodeSet != NULL) {
00369         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00370           if (langOk(nodeSet->nodeTab[i], lingDef)) {
00371             addFeature(nodeSet->nodeTab[i], *pos);
00372           }
00373         }}
00374       }
00375       xmlXPathFreeObject(xpObj);
00376     }
00377   }
00378 
00379   // Get conflicts
00380   {
00381     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) ".//Conflict",
00382                                            xpContext);
00383     if (xpObj != NULL) {
00384       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00385       if (nodeSet != NULL) {
00386         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00387           xmlNodePtr conflictNode = nodeSet->nodeTab[i];
00388           if (langOk(conflictNode, lingDef)) {
00389 
00390             string aName;
00391             {
00392               xmlChar *tmp = xmlGetProp(conflictNode, (xmlChar *) "aName");
00393               if (tmp != NULL) {
00394                 aName = (char *) tmp;
00395                 xmlFree(tmp);
00396               }
00397             }
00398 
00399             string bName;
00400             {
00401               xmlChar *tmp = xmlGetProp(conflictNode, (xmlChar *) "bName");
00402               if (tmp != NULL) {
00403                 bName = (char *) tmp;
00404                 xmlFree(tmp);
00405               }
00406             }
00407 
00408             const LingDef::Feature *aFeature = pos->getFeature(aName);
00409             const LingDef::Feature *bFeature = pos->getFeature(bName);
00410 
00411             if (aFeature == NULL) {
00412               //ERROR
00413               cerr << "Feature '" << aName
00414                    << "' in conflict not defined " << endl;
00415             } else if (bFeature == NULL) {
00416               //ERROR
00417               cerr << "Feature '" << bName
00418                    << "' in conflict not defined " << endl;
00419             } else {
00420               lingDef.addConflict(*aFeature, *bFeature);
00421             }
00422           }
00423         }}
00424       }
00425       xmlXPathFreeObject(xpObj);
00426     }
00427   }
00428 
00429   xmlXPathFreeContext(xpContext);
00430 
00431   return pos;
00432 }
00433 
00437 LingDef::Feature::Domain
00438 XmlLingDefFormatter::getFeatureDomain(xmlNodePtr featureNode) {
00439   string parentNodeName = (char *) featureNode->parent->name;
00440   if (parentNodeName == "Morphology") {
00441     return LingDef::Feature::MORPHO;
00442   } else if (parentNodeName == "Syntax") {
00443     return LingDef::Feature::SYNTAX;
00444   } else if (parentNodeName == "Semantics") {
00445     return LingDef::Feature::SEMANTIC;
00446   } else if (parentNodeName == "Misc") {
00447     return LingDef::Feature::MISC;
00448   }
00449   return LingDef::Feature::MISC;
00450 }
00451 
00455 LingDef::Feature::Type
00456 XmlLingDefFormatter::getFeatureType(xmlNodePtr featureNode) {
00457   string nodeName = (char *) featureNode->name;
00458   if (nodeName == "Boolean") {
00459     return LingDef::Feature::BOOLEAN;
00460   } else if (nodeName == "Enum") {
00461     return LingDef::Feature::ENUM;
00462   } else if (nodeName == "Reference") {
00463     return LingDef::Feature::REFERENCE;
00464   } else if (nodeName == "VTree") {
00465     return LingDef::Feature::VTREE;
00466   }
00467   return LingDef::Feature::BOOLEAN;
00468 }
00469 
00473 bool XmlLingDefFormatter::addFeature(xmlNodePtr featureNode, LingDef::Pos &pos) {
00474   xmlXPathContextPtr xpContext = xmlXPathNewContext(featureNode->doc);
00475   xpContext->node = featureNode;
00476 
00477   string featureName;
00478   {
00479     xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "name");
00480     if (tmp != NULL) {
00481       featureName = (char *) tmp;
00482       xmlFree(tmp);
00483     }
00484   }
00485 
00486   addFeature(featureNode,
00487              pos.createFeature(featureName,
00488                                getFeatureDomain(featureNode),
00489                                getFeatureType(featureNode)));
00490 
00491   xmlXPathFreeContext(xpContext);
00492 
00493   return true;
00494 }
00495 
00499 bool XmlLingDefFormatter::addFeature(xmlNodePtr featureNode,
00500                                      LingDef::Feature &feature) {
00501   xmlXPathContextPtr xpContext = xmlXPathNewContext(featureNode->doc);
00502   xpContext->node = featureNode;
00503 
00504   if (feature.getType() == LingDef::Feature::BOOLEAN) {
00505 
00506     {
00507       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "default");
00508       if (tmp != NULL) {
00509         feature.setDefault(xmlStrcmp(tmp, (xmlChar *) "true") == 0);
00510         xmlFree(tmp);
00511       }
00512     }
00513 
00514     {
00515       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "short");
00516       if (tmp != NULL) {
00517         feature.setShortName((char *) tmp);
00518         xmlFree(tmp);
00519       }
00520     }
00521 
00522   } else if (feature.getType() == LingDef::Feature::ENUM) {
00523 
00524     {
00525       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "allowNone");
00526       if (tmp != NULL) {
00527         feature.setAllowNoValue(xmlStrcmp(tmp, (xmlChar *) "true") == 0);
00528         xmlFree(tmp);
00529       }
00530     }
00531 
00532     {
00533       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "allowSeveral");
00534       if (tmp != NULL) {
00535         feature.setAllowSeveralValues(xmlStrcmp(tmp, (xmlChar *)"true") == 0);
00536         xmlFree(tmp);
00537       }
00538     }
00539 
00540     // Go through values
00541     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Value", xpContext);
00542     if (xpObj != NULL) {
00543       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00544       if (nodeSet != NULL) {
00545         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00546           xmlNodePtr valueNode = nodeSet->nodeTab[i];
00547           if (langOk(valueNode, *feature.getLingDef())) {
00548 
00549             string valueName;
00550             {
00551               xmlChar *tmp = xmlGetProp(valueNode, (xmlChar *) "name");
00552               if (tmp != NULL) {
00553                 valueName = (char *) tmp;
00554                 xmlFree(tmp);
00555               }
00556             }
00557 
00558             addFeature(valueNode, feature.createEnumValueFeature(valueName));
00559           }
00560         }}
00561       }
00562       xmlXPathFreeObject(xpObj);
00563     }
00564 
00565   } else if (feature.getType() == LingDef::Feature::REFERENCE) {
00566     {
00567       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "allowSeveral");
00568       if (tmp != NULL) {
00569         feature.setAllowSeveralValues(xmlStrcmp(tmp, (xmlChar *)"true") == 0);
00570         xmlFree(tmp);
00571       }
00572     }
00573 
00574   } else if (feature.getType() == LingDef::Feature::VTREE) {
00575     {
00576       xmlChar *tmp = xmlGetProp(featureNode, (xmlChar *) "treeId");
00577       if (tmp != NULL) {
00578         const LingDef::Tree *tree =
00579           feature.getLingDef()->getTree((char *) tmp);
00580         if (tree == NULL) {
00581           cerr << "Could not find tree " << (char *) tmp << endl;
00582         } else {
00583           feature.setTree(*tree);
00584         }
00585         xmlFree(tmp);
00586       }
00587     }
00588   }
00589 
00590   xmlXPathFreeContext(xpContext);
00591 
00592   return true;
00593 }
00594 
00598 bool XmlLingDefFormatter::addTree(xmlNodePtr treeNode, LingDef &lingDef) {
00599   string treeId;
00600   {
00601     xmlChar *tmp = xmlGetProp(treeNode, (xmlChar *) "id");
00602     if (tmp != NULL) {
00603       treeId = (char *) tmp;
00604       xmlFree(tmp);
00605     }
00606   }
00607 
00608   addTreeNode(treeNode, lingDef, lingDef.createTree(treeId).getRootNode());
00609 
00610   return true;
00611 }
00612 
00613 
00617 bool XmlLingDefFormatter::addTreeNode(xmlNodePtr nodeParentNode,
00618                                       LingDef &lingDef,
00619                                       LingDef::Tree::Node &parent) {
00620   xmlXPathContextPtr xpContext = xmlXPathNewContext(nodeParentNode->doc);
00621   xpContext->node = nodeParentNode;
00622 
00623   // Go through child nodes
00624   {
00625     xmlXPathObjectPtr xpObj = xmlXPathEval((xmlChar *) "Node", xpContext);
00626 
00627     if (xpObj != NULL) {
00628       xmlNodeSetPtr nodeSet = xpObj->nodesetval;
00629       if (nodeSet != NULL && nodeSet->nodeNr > 0) {
00630         {for (int i = 0; i < nodeSet->nodeNr; i++) {
00631           if (langOk(nodeSet->nodeTab[i], lingDef)) {
00632             xmlNodePtr nodeNode = nodeSet->nodeTab[i];
00633 
00634             string nodeName;
00635             {
00636               xmlChar *tmp = xmlGetProp(nodeNode, (xmlChar *) "name");
00637               if (tmp != NULL) {
00638                 nodeName = (char *) tmp;
00639                 xmlFree(tmp);
00640               }
00641             }
00642 
00643             addTreeNode(nodeNode, lingDef, parent.createChildNode(nodeName));
00644           }
00645         }}
00646       }
00647       xmlXPathFreeObject(xpObj);
00648     }
00649   }
00650 
00651   xmlXPathFreeContext(xpContext);
00652 
00653   return true;
00654 }
00655 
00659 string XmlLingDefFormatter::output(const LingDef &) const {
00660   return "";
00661 }
00662 
00666 void XmlLingDefFormatter::output(const LingDef &, ostream &) const {
00667 }

Generated on Fri Jun 23 14:03:15 2006 for LinguisticDefinition by  doxygen 1.4.7