LingFeatures.cc

Go to the documentation of this file.
00001 #include "LinguisticDefinition/LingFeatures.h"
00002 
00003 using namespace std;
00004 using namespace LinguisticDefinition;
00005 
00009 LingFeatures::LingFeatures() :
00010   d_lingDef(NULL),
00011   d_posDef(NULL) {
00012 }
00013 
00017 LingFeatures::~LingFeatures() {
00018 }
00019 
00023 LingFeatures::LingFeatures(const LingDef &lingDef) :
00024   d_lingDef(&lingDef),
00025   d_posDef(NULL) {
00026 }
00027 
00031 LingFeatures::LingFeatures(const LingDef::Pos &pos) :
00032   d_lingDef(pos.getLingDef()),
00033   d_posDef(&pos) {
00034   setDefaults();
00035 }
00036 
00040 const LingDef *LingFeatures::getLingDef() const {
00041   return d_lingDef;
00042 }
00043 
00047 const LingDef::Pos *LingFeatures::getPosDef() const {
00048   return d_posDef;
00049 }
00050 
00054 void LingFeatures::setPosDef(const LingDef::Pos &pos) {
00055   d_posDef = &pos;
00056   setDefaults();
00057 }
00058 
00062 void LingFeatures::execute(const std::string &expr) {
00063   // Make copy
00064   string expression = expr;
00065   string::size_type expressionLength = expression.length();
00066 
00067   if (expressionLength > 0) {
00068     string::size_type fieldBeginIndex = 0;
00069     bool inFieldName = false;
00070     bool positive = false;
00071 
00072     {for (string::size_type index = 0; index < expressionLength; index++) {
00073       char c = expression[index];
00074         
00075       if (!inFieldName) {
00076         if (c == '+') {
00077           positive = true;
00078           inFieldName = true;
00079         } else if (c == '-') {
00080           positive = false;
00081           inFieldName = true;
00082         }
00083         if (inFieldName) {
00084           fieldBeginIndex = index + 1;
00085         }
00086       } else {
00087         if (('a' <= c && c <= 'z') ||
00088             ('A' <= c && c <= 'Z') ||
00089             ('0' <= c && c <= '9') ||
00090             c == '_' ||
00091             c == '=' ||
00092             c == '$' ||
00093             c == '.') {
00094           if (index == expressionLength - 1) {
00095             index++;
00096             inFieldName = false;
00097           }
00098         } else {
00099           index--;
00100           inFieldName = false;
00101         }
00102         if (!inFieldName) {
00103           string fieldValue = expression.substr(fieldBeginIndex,
00104                                                 index + 1 - fieldBeginIndex);
00105           string::size_type equalsIndex = fieldValue.find('=');
00106           if (equalsIndex != string::npos) {
00107             string equalsValue = fieldValue.substr(equalsIndex + 1);
00108             fieldValue = fieldValue.substr(0, equalsIndex);
00109 
00110             if (equalsValue.length() > 1 && equalsValue[0] == '$') {
00111               string::size_type dotIndex = equalsValue.find('.');
00112 
00113               if (dotIndex != string::npos) {
00114                 string referenceTarget = equalsValue.substr(1, dotIndex - 1);
00115                 string referenceEnum = equalsValue.substr(dotIndex + 1);
00116 
00117                 //TODO: how to integrate? only matching...
00118 
00119               } else {
00120                 string referenceTarget = equalsValue.substr(1);
00121                 int referenceTargetInt = atoi(referenceTarget.c_str());
00122                 setReference(fieldValue, referenceTargetInt);
00123               }
00124                 
00125             } else {
00126               if (positive) {
00127                 set(equalsValue);
00128               } else {
00129                 setNegative(equalsValue);
00130               }
00131             }
00132               
00133           } else {
00134             if (positive) {
00135               set(fieldValue);
00136             } else {
00137               setNegative(fieldValue);
00138             }
00139           }
00140         }
00141       }
00142     }}
00143   }
00144 }
00145 
00149 void LingFeatures::set(const LingDef::Feature &feature) {
00150 //   if (feature.getType() == LingDef::Feature::BOOLEAN) {
00151     return setSub(feature);
00152 //   }
00153 }
00154 
00158 void LingFeatures::setNegative(const LingDef::Feature &feature) {
00159   unset(feature);
00160   d_negativeFeatures.insert(&feature);
00161 }
00162 
00166 void LingFeatures::unset(const LingDef::Feature &feature) {
00167   return unsetSub(feature);
00168 }
00169 
00173 void LingFeatures::setSub(const LingDef::Feature &feature) {
00174   if (has(feature)) {
00175     // In the case of vtree, add anyway
00176     d_features.insert(&feature);
00177 
00178   } else { 
00179     d_features.insert(&feature);
00180     d_negativeFeatures.erase(&feature);
00181 
00182     if (feature.getType() == LingDef::Feature::ENUM) {
00183       setDefaultForEnum(feature);
00184     }
00185 
00186     const LingDef::Feature *parentEnum = feature.getParentEnum();
00187     if (parentEnum != NULL) {
00188 
00189       if (!parentEnum->allowSeveralValues()) {
00190         // Must remove any old enum values for this feature
00191         d_enums.erase(parentEnum);
00192       }
00193 
00194       pair<const LingDef::Feature *, const LingDef::Feature *> p(parentEnum,
00195                                                                  &feature);
00196       d_enums.insert(p);
00197 
00198       if (!has(*parentEnum)) {
00199         d_features.insert(parentEnum);
00200 
00201       } else if (!parentEnum->allowSeveralValues()) {
00202         {for (LingDef::Feature::EnumChildrenIterator
00203                 it = parentEnum->enumChildrenBegin();
00204               it != parentEnum->enumChildrenEnd(); ++it) {
00205           if (*it != &feature) {
00206             const LingDef::Feature *siblingFeature = *it;
00207             d_features.erase(siblingFeature);
00208           }
00209         }}
00210       }
00211 
00212       // Unset conflicting features
00213       if (d_lingDef != NULL) {
00214         const LingDef::Pos::FeatureList *conflicts =
00215           d_lingDef->getConflicts(*parentEnum);
00216         if (conflicts != NULL) {
00217           {for (LingDef::Pos::FeatureIterator it = conflicts->begin();
00218                 it != conflicts->end(); ++it) {
00219             unset(**it);
00220           }}
00221         }
00222       }
00223 
00224     }
00225 
00226     // Unset conflicting features
00227     if (d_lingDef != NULL) {
00228       const LingDef::Pos::FeatureList *conflicts =
00229         d_lingDef->getConflicts(feature);
00230       if (conflicts != NULL) {
00231         {for (LingDef::Pos::FeatureIterator it = conflicts->begin();
00232               it != conflicts->end(); ++it) {
00233           unset(**it);
00234         }}
00235       }
00236     }
00237   }
00238 }
00239 
00243 void LingFeatures::unsetSub(const LingDef::Feature &feature) {
00244   d_negativeFeatures.erase(&feature);
00245 
00246   if (has(feature)) {
00247     d_features.erase(&feature);
00248 
00249     const LingDef::Feature *enumFeature = NULL;
00250     if (feature.getParentEnum() != NULL) {
00251       enumFeature = feature.getParentEnum();
00252     } else if (feature.getType() == LingDef::Feature::ENUM) {
00253       enumFeature = &feature;
00254     } else if (feature.getType() == LingDef::Feature::REFERENCE) {
00255       d_references.erase(&feature);
00256     }
00257     //TODO: vtree?
00258 
00259     if (enumFeature != NULL) {
00260 
00261       if (!enumFeature->allowSeveralValues()) {
00262         d_features.erase(enumFeature);
00263         d_enums.erase(enumFeature);
00264 
00265         {for (LingDef::Feature::EnumChildrenIterator
00266                 it = enumFeature->enumChildrenBegin();
00267               it != enumFeature->enumChildrenEnd(); ++it) {
00268           const LingDef::Feature *siblingFeature = *it;
00269           d_features.erase(siblingFeature);
00270         }}
00271 
00272         if (!enumFeature->allowNoValue()) {
00273           setDefaultForEnum(*enumFeature);
00274         }
00275 
00276       } else {
00277         {for (multimap<const LingDef::Feature *, const LingDef::Feature *>::
00278                 iterator it = d_enums.find(enumFeature);
00279               it != d_enums.end() && (*it).first == enumFeature; ) {
00280           const LingDef::Feature *f = (*it).second;
00281           multimap<const LingDef::Feature *, const LingDef::Feature *>::
00282             iterator prevIt = it;
00283           ++it;
00284           if (f == &feature || &feature == enumFeature) {
00285             d_enums.erase(prevIt);
00286             if (&feature == enumFeature) {
00287               d_features.erase(f);
00288             }
00289           }
00290         }}
00291       
00292         if (d_enums.find(enumFeature) == d_enums.end()) {
00293           // There was not enum value left
00294           d_features.erase(enumFeature);
00295           if (!enumFeature->allowNoValue()) {
00296             setDefaultForEnum(*enumFeature);
00297           }
00298         }
00299       }
00300 
00301     } else {
00302       const LingDef::Tree *tree = feature.getTree();
00303       if (tree != NULL) {
00304         //TODO: unset for this tree?
00305       }
00306     }
00307   }
00308 }
00309 
00313 bool LingFeatures::has(const LingDef::Feature &feature) const {
00314   return feature.isIn(d_features);
00315 }
00316 
00320 bool LingFeatures::hasNegative(const LingDef::Feature &feature) const {
00321   return d_negativeFeatures.find(&feature) != d_negativeFeatures.end();
00322 }
00323 
00327 bool LingFeatures::isDefined(const LingDef::Feature &feature) const {
00328   return hasNegative(feature) || has(feature);
00329 }
00330 
00334 void LingFeatures::set(const string &feature) {
00335   if (d_posDef != NULL) {
00336     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00337     if (featureDef != NULL) {
00338       return set(*featureDef);
00339     }
00340   }
00341 }
00342 
00346 void LingFeatures::setNegative(const string &feature) {
00347   if (d_posDef != NULL) {
00348     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00349     if (featureDef != NULL) {
00350       return setNegative(*featureDef);
00351     }
00352   }
00353 }
00354 
00358 void LingFeatures::unset(const string &feature) {
00359   if (d_posDef != NULL) {
00360     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00361     if (featureDef != NULL) {
00362       return unset(*featureDef);
00363     }
00364   }
00365 }
00366 
00370 void LingFeatures::setReference(const string &feature, int referenceValue) {
00371   if (d_posDef != NULL) {
00372     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00373     if (featureDef != NULL) {
00374       return setReference(*featureDef, referenceValue);
00375     }
00376   }
00377 }
00378 
00382 int LingFeatures::getReference(const string &feature) const {
00383   if (d_posDef != NULL) {
00384     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00385     if (featureDef != NULL) {
00386       return getReference(*featureDef);
00387     }
00388   }
00389   return 0;
00390 }
00391 
00395 void LingFeatures::getReferenceList(const string &feature,
00396                                     vector<int> &results) const {
00397   if (d_posDef != NULL) {
00398     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00399     if (featureDef != NULL) {
00400       getReferenceList(*featureDef, results);
00401     }
00402   }
00403 }
00404 
00408 bool LingFeatures::hasReference(const string &feature,
00409                                 int referenceValue) const {
00410   if (d_posDef != NULL) {
00411     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00412     if (featureDef != NULL) {
00413       return hasReference(*featureDef, referenceValue);
00414     }
00415   }
00416   return false;
00417 }
00418 
00422 void LingFeatures::setReference(const LingDef::Feature &feature,
00423                                 int referenceValue) {
00424   if (!feature.allowSeveralValues()) {
00425     // Must remove any old references for this feature
00426     d_references.erase(&feature);
00427   }
00428   d_references.insert(pair<const LingDef::Feature *, int>(&feature,
00429                                                           referenceValue));
00430   setSub(feature);
00431 }
00432 
00436 int LingFeatures::getReference(const LingDef::Feature &feature) const {
00437   multimap<const LingDef::Feature *, int>::const_iterator
00438     findIt = d_references.find(&feature);
00439   if (findIt != d_references.end()) {
00440     return (*findIt).second;
00441   }
00442   return 0;
00443 }
00444 
00448 void LingFeatures::getReferenceList(const LingDef::Feature &feature,
00449                                     vector<int> &results) const {
00450   {for (multimap<const LingDef::Feature *, int>::const_iterator
00451           it = d_references.find(&feature);
00452         it != d_references.end() && (*it).first == &feature; ++it) {
00453     results.push_back((*it).second);
00454   }}
00455 }
00456 
00460 bool LingFeatures::hasReference(const LingDef::Feature &feature,
00461                                 int referenceValue) const {
00462   {for (multimap<const LingDef::Feature *, int>::const_iterator
00463           it = d_references.find(&feature);
00464         it != d_references.end() && (*it).first == &feature; ++it) {
00465     if ((*it).second == referenceValue) {
00466       return true;
00467     }
00468   }}
00469   return false;
00470 }
00471 
00475 void LingFeatures::setEnum(const string &enumFeature,
00476                            const string &valueFeature) {
00477   if (d_posDef != NULL) {
00478     const LingDef::Feature *enumFeatureDef = d_posDef->getFeature(enumFeature);
00479     if (enumFeatureDef != NULL) {
00480       const LingDef::Feature *valueFeatureDef =
00481         d_posDef->getFeature(valueFeature);
00482 
00483       if (valueFeatureDef != NULL) {
00484         setEnum(*enumFeatureDef, *valueFeatureDef);
00485       }
00486     }
00487   }
00488 }
00489 
00493 void LingFeatures::setEnum(const LingDef::Feature &enumFeature,
00494                            const LingDef::Feature &valueFeature) {
00495   if (valueFeature.getParentEnum() == &enumFeature) {
00496     set(valueFeature);
00497   }
00498 }
00499 
00503 const LingDef::Feature *
00504 LingFeatures::getEnumValue(const string &enumFeature) const {
00505   if (d_posDef != NULL) {
00506     const LingDef::Feature *featureDef = d_posDef->getFeature(enumFeature);
00507     if (featureDef != NULL) {
00508       return getEnumValue(*featureDef);
00509     }
00510   }
00511   return NULL;
00512 }
00513 
00517 const LingDef::Feature *
00518 LingFeatures::getEnumValue(const LingDef::Feature &enumFeature) const {
00519   //TODO: warn if several
00520   multimap<const LingDef::Feature *, const LingDef::Feature *>::const_iterator
00521     findIt = d_enums.find(&enumFeature);
00522   if (findIt != d_enums.end()) {
00523     return (*findIt).second;
00524   }
00525   return NULL;
00526 }
00527 
00531 void LingFeatures::getEnumValues(const string &enumFeature,
00532                                  std::set<const LingDef::Feature *> &results)
00533   const {
00534   if (d_posDef != NULL) {
00535     const LingDef::Feature *featureDef = d_posDef->getFeature(enumFeature);
00536     if (featureDef != NULL) {
00537       getEnumValues(*featureDef, results);
00538     }
00539   }
00540 }
00541 
00545 void LingFeatures::getEnumValues(const LingDef::Feature &enumFeature,
00546                                  std::set<const LingDef::Feature *> &results)
00547   const {
00548   {for (multimap<const LingDef::Feature *, const LingDef::Feature *>::
00549           const_iterator it = d_enums.find(&enumFeature);
00550         it != d_enums.end() && (*it).first == &enumFeature; ++it) {
00551     results.insert((*it).second);
00552   }}
00553 }
00554 
00558 int LingFeatures::hasEnumValue(const std::string &enumValueName) const {
00559   if (d_posDef != NULL) {
00560     const LingDef::Feature *enumValueDef = d_posDef->getFeature(enumValueName);
00561     if (enumValueDef != NULL) {
00562       return hasEnumValue(*enumValueDef);
00563     }
00564   }
00565   return 0;
00566 }
00567 
00571 int LingFeatures::hasEnumValue(const LingDef::Feature &enumValueFeature)
00572   const {
00573   const LingDef::Feature *parentEnum = enumValueFeature.getParentEnum();
00574 
00575   if (parentEnum != NULL && has(enumValueFeature)) {
00576     std::set<const LingDef::Feature *> enumValues;
00577     getEnumValues(*parentEnum, enumValues);
00578     if (enumValues.size() > 1) {
00579       return 2;
00580     }
00581     return 1;
00582   }
00583 
00584   return 0;
00585 }
00586 
00590 bool LingFeatures::has(const string &feature) const {
00591   if (d_posDef != NULL) {
00592     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00593     if (featureDef != NULL) {
00594       return has(*featureDef);
00595     }
00596   }
00597   return false;
00598 }
00599 
00603 bool LingFeatures::hasNegative(const std::string &feature) const {
00604   if (d_posDef != NULL) {
00605     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00606     if (featureDef != NULL) {
00607       return hasNegative(*featureDef);
00608     }
00609   }
00610   return false;
00611 }
00612 
00616 bool LingFeatures::isDefined(const std::string &feature) const {
00617   if (d_posDef != NULL) {
00618     const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00619     if (featureDef != NULL) {
00620       return isDefined(*featureDef);
00621     }
00622   }
00623   return false;
00624 }
00625 
00629 const LingFeatures &LingFeatures::operator+=(const std::string &feature) {
00630   set(feature);
00631   return *this;
00632 }
00633 
00637 const LingFeatures &LingFeatures::operator+=(const LingDef::Feature &feature) {
00638   set(feature);
00639   return *this;
00640 }
00641 
00645 const LingFeatures &LingFeatures::operator-=(const std::string &feature) {
00646   setNegative(feature);
00647   return *this;
00648 }
00649 
00653 const LingFeatures &LingFeatures::operator-=(const LingDef::Feature &feature) {
00654   setNegative(feature);
00655   return *this;
00656 }
00657 
00661 bool LingFeatures::covers(const LingFeatures &other) const {
00662   if (d_posDef != other.d_posDef) {
00663     return false;
00664   }
00665 
00666   // The other LingFeatures must have all features to be covered
00667   {for (std::set<const LingDef::Feature *>::const_iterator
00668           it = d_features.begin();
00669         it != d_features.end(); ++it) {
00670     const LingDef::Feature *feature = *it;
00671     if (!other.has(*feature)) {
00672       return false;
00673     }
00674   }}
00675 
00676   return true;
00677 }
00678 
00682 LingFeatures::FeatureIterator LingFeatures::featuresBegin() const {
00683   return d_features.begin();
00684 }
00685 
00689 LingFeatures::FeatureIterator LingFeatures::featuresEnd() const {
00690   return d_features.end();
00691 }
00692 
00696 void LingFeatures::setDefaultForEnum(const LingDef::Feature &enumFeature) {
00697   {for (LingDef::Feature::EnumChildrenIterator
00698           it = enumFeature.enumChildrenBegin();
00699         it != enumFeature.enumChildrenEnd(); ++it) {
00700     const LingDef::Feature *enumValueFeature = *it;
00701     if (enumValueFeature->isDefault()) {
00702       set(*enumValueFeature);
00703       return;
00704     }
00705   }}
00706 }
00707 
00711 void LingFeatures::setDefaults() {
00712   const LingDef::Pos *posDef = d_posDef;
00713   while (posDef != NULL) {
00714     {for (LingDef::Pos::FeatureIterator it = posDef->defaultFeaturesBegin();
00715           it != posDef->defaultFeaturesEnd(); ++it) {
00716       const LingDef::Feature *feature = *it;
00717       set(*feature);
00718     }}
00719     posDef = posDef->getSuperPos();
00720   }
00721 }
00722 
00726 const std::string &LingFeatures::getLemma() const {
00727   return d_lemma;
00728 }
00729 
00733 void LingFeatures::setLemma(const std::string &val) {
00734   d_lemma = val;
00735 }
00736 
00740 const std::string &LingFeatures::getForm() const {
00741   return d_form;
00742 }
00743 
00747 void LingFeatures::setForm(const std::string &val) {
00748   d_form = val;
00749 }

Generated on Fri Jun 23 14:03:14 2006 for LinguisticDefinition by  doxygen 1.4.7