00001 #include "LinguisticDefinition/LingFeatures.h"
00002
00003 using namespace std;
00004 using namespace LinguisticDefinition;
00005
00009 LingFeatures::LingFeatures() :
00010 d_lingDef(NULL),
00011 d_posDef(NULL) {
00012 }
00013
00017 LingFeatures::~LingFeatures() {
00018 }
00019
00023 LingFeatures::LingFeatures(const LingDef &lingDef) :
00024 d_lingDef(&lingDef),
00025 d_posDef(NULL) {
00026 }
00027
00031 LingFeatures::LingFeatures(const LingDef::Pos &pos) :
00032 d_lingDef(pos.getLingDef()),
00033 d_posDef(&pos) {
00034 setDefaults();
00035 }
00036
00040 const LingDef *LingFeatures::getLingDef() const {
00041 return d_lingDef;
00042 }
00043
00047 const LingDef::Pos *LingFeatures::getPosDef() const {
00048 return d_posDef;
00049 }
00050
00054 void LingFeatures::setPosDef(const LingDef::Pos &pos) {
00055 d_posDef = &pos;
00056 setDefaults();
00057 }
00058
00062 void LingFeatures::execute(const std::string &expr) {
00063
00064 string expression = expr;
00065 string::size_type expressionLength = expression.length();
00066
00067 if (expressionLength > 0) {
00068 string::size_type fieldBeginIndex = 0;
00069 bool inFieldName = false;
00070 bool positive = false;
00071
00072 {for (string::size_type index = 0; index < expressionLength; index++) {
00073 char c = expression[index];
00074
00075 if (!inFieldName) {
00076 if (c == '+') {
00077 positive = true;
00078 inFieldName = true;
00079 } else if (c == '-') {
00080 positive = false;
00081 inFieldName = true;
00082 }
00083 if (inFieldName) {
00084 fieldBeginIndex = index + 1;
00085 }
00086 } else {
00087 if (('a' <= c && c <= 'z') ||
00088 ('A' <= c && c <= 'Z') ||
00089 ('0' <= c && c <= '9') ||
00090 c == '_' ||
00091 c == '=' ||
00092 c == '$' ||
00093 c == '.') {
00094 if (index == expressionLength - 1) {
00095 index++;
00096 inFieldName = false;
00097 }
00098 } else {
00099 index--;
00100 inFieldName = false;
00101 }
00102 if (!inFieldName) {
00103 string fieldValue = expression.substr(fieldBeginIndex,
00104 index + 1 - fieldBeginIndex);
00105 string::size_type equalsIndex = fieldValue.find('=');
00106 if (equalsIndex != string::npos) {
00107 string equalsValue = fieldValue.substr(equalsIndex + 1);
00108 fieldValue = fieldValue.substr(0, equalsIndex);
00109
00110 if (equalsValue.length() > 1 && equalsValue[0] == '$') {
00111 string::size_type dotIndex = equalsValue.find('.');
00112
00113 if (dotIndex != string::npos) {
00114 string referenceTarget = equalsValue.substr(1, dotIndex - 1);
00115 string referenceEnum = equalsValue.substr(dotIndex + 1);
00116
00117
00118
00119 } else {
00120 string referenceTarget = equalsValue.substr(1);
00121 int referenceTargetInt = atoi(referenceTarget.c_str());
00122 setReference(fieldValue, referenceTargetInt);
00123 }
00124
00125 } else {
00126 if (positive) {
00127 set(equalsValue);
00128 } else {
00129 setNegative(equalsValue);
00130 }
00131 }
00132
00133 } else {
00134 if (positive) {
00135 set(fieldValue);
00136 } else {
00137 setNegative(fieldValue);
00138 }
00139 }
00140 }
00141 }
00142 }}
00143 }
00144 }
00145
00149 void LingFeatures::set(const LingDef::Feature &feature) {
00150
00151 return setSub(feature);
00152
00153 }
00154
00158 void LingFeatures::setNegative(const LingDef::Feature &feature) {
00159 unset(feature);
00160 d_negativeFeatures.insert(&feature);
00161 }
00162
00166 void LingFeatures::unset(const LingDef::Feature &feature) {
00167 return unsetSub(feature);
00168 }
00169
00173 void LingFeatures::setSub(const LingDef::Feature &feature) {
00174 if (has(feature)) {
00175
00176 d_features.insert(&feature);
00177
00178 } else {
00179 d_features.insert(&feature);
00180 d_negativeFeatures.erase(&feature);
00181
00182 if (feature.getType() == LingDef::Feature::ENUM) {
00183 setDefaultForEnum(feature);
00184 }
00185
00186 const LingDef::Feature *parentEnum = feature.getParentEnum();
00187 if (parentEnum != NULL) {
00188
00189 if (!parentEnum->allowSeveralValues()) {
00190
00191 d_enums.erase(parentEnum);
00192 }
00193
00194 pair<const LingDef::Feature *, const LingDef::Feature *> p(parentEnum,
00195 &feature);
00196 d_enums.insert(p);
00197
00198 if (!has(*parentEnum)) {
00199 d_features.insert(parentEnum);
00200
00201 } else if (!parentEnum->allowSeveralValues()) {
00202 {for (LingDef::Feature::EnumChildrenIterator
00203 it = parentEnum->enumChildrenBegin();
00204 it != parentEnum->enumChildrenEnd(); ++it) {
00205 if (*it != &feature) {
00206 const LingDef::Feature *siblingFeature = *it;
00207 d_features.erase(siblingFeature);
00208 }
00209 }}
00210 }
00211
00212
00213 if (d_lingDef != NULL) {
00214 const LingDef::Pos::FeatureList *conflicts =
00215 d_lingDef->getConflicts(*parentEnum);
00216 if (conflicts != NULL) {
00217 {for (LingDef::Pos::FeatureIterator it = conflicts->begin();
00218 it != conflicts->end(); ++it) {
00219 unset(**it);
00220 }}
00221 }
00222 }
00223
00224 }
00225
00226
00227 if (d_lingDef != NULL) {
00228 const LingDef::Pos::FeatureList *conflicts =
00229 d_lingDef->getConflicts(feature);
00230 if (conflicts != NULL) {
00231 {for (LingDef::Pos::FeatureIterator it = conflicts->begin();
00232 it != conflicts->end(); ++it) {
00233 unset(**it);
00234 }}
00235 }
00236 }
00237 }
00238 }
00239
00243 void LingFeatures::unsetSub(const LingDef::Feature &feature) {
00244 d_negativeFeatures.erase(&feature);
00245
00246 if (has(feature)) {
00247 d_features.erase(&feature);
00248
00249 const LingDef::Feature *enumFeature = NULL;
00250 if (feature.getParentEnum() != NULL) {
00251 enumFeature = feature.getParentEnum();
00252 } else if (feature.getType() == LingDef::Feature::ENUM) {
00253 enumFeature = &feature;
00254 } else if (feature.getType() == LingDef::Feature::REFERENCE) {
00255 d_references.erase(&feature);
00256 }
00257
00258
00259 if (enumFeature != NULL) {
00260
00261 if (!enumFeature->allowSeveralValues()) {
00262 d_features.erase(enumFeature);
00263 d_enums.erase(enumFeature);
00264
00265 {for (LingDef::Feature::EnumChildrenIterator
00266 it = enumFeature->enumChildrenBegin();
00267 it != enumFeature->enumChildrenEnd(); ++it) {
00268 const LingDef::Feature *siblingFeature = *it;
00269 d_features.erase(siblingFeature);
00270 }}
00271
00272 if (!enumFeature->allowNoValue()) {
00273 setDefaultForEnum(*enumFeature);
00274 }
00275
00276 } else {
00277 {for (multimap<const LingDef::Feature *, const LingDef::Feature *>::
00278 iterator it = d_enums.find(enumFeature);
00279 it != d_enums.end() && (*it).first == enumFeature; ) {
00280 const LingDef::Feature *f = (*it).second;
00281 multimap<const LingDef::Feature *, const LingDef::Feature *>::
00282 iterator prevIt = it;
00283 ++it;
00284 if (f == &feature || &feature == enumFeature) {
00285 d_enums.erase(prevIt);
00286 if (&feature == enumFeature) {
00287 d_features.erase(f);
00288 }
00289 }
00290 }}
00291
00292 if (d_enums.find(enumFeature) == d_enums.end()) {
00293
00294 d_features.erase(enumFeature);
00295 if (!enumFeature->allowNoValue()) {
00296 setDefaultForEnum(*enumFeature);
00297 }
00298 }
00299 }
00300
00301 } else {
00302 const LingDef::Tree *tree = feature.getTree();
00303 if (tree != NULL) {
00304
00305 }
00306 }
00307 }
00308 }
00309
00313 bool LingFeatures::has(const LingDef::Feature &feature) const {
00314 return feature.isIn(d_features);
00315 }
00316
00320 bool LingFeatures::hasNegative(const LingDef::Feature &feature) const {
00321 return d_negativeFeatures.find(&feature) != d_negativeFeatures.end();
00322 }
00323
00327 bool LingFeatures::isDefined(const LingDef::Feature &feature) const {
00328 return hasNegative(feature) || has(feature);
00329 }
00330
00334 void LingFeatures::set(const string &feature) {
00335 if (d_posDef != NULL) {
00336 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00337 if (featureDef != NULL) {
00338 return set(*featureDef);
00339 }
00340 }
00341 }
00342
00346 void LingFeatures::setNegative(const string &feature) {
00347 if (d_posDef != NULL) {
00348 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00349 if (featureDef != NULL) {
00350 return setNegative(*featureDef);
00351 }
00352 }
00353 }
00354
00358 void LingFeatures::unset(const string &feature) {
00359 if (d_posDef != NULL) {
00360 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00361 if (featureDef != NULL) {
00362 return unset(*featureDef);
00363 }
00364 }
00365 }
00366
00370 void LingFeatures::setReference(const string &feature, int referenceValue) {
00371 if (d_posDef != NULL) {
00372 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00373 if (featureDef != NULL) {
00374 return setReference(*featureDef, referenceValue);
00375 }
00376 }
00377 }
00378
00382 int LingFeatures::getReference(const string &feature) const {
00383 if (d_posDef != NULL) {
00384 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00385 if (featureDef != NULL) {
00386 return getReference(*featureDef);
00387 }
00388 }
00389 return 0;
00390 }
00391
00395 void LingFeatures::getReferenceList(const string &feature,
00396 vector<int> &results) const {
00397 if (d_posDef != NULL) {
00398 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00399 if (featureDef != NULL) {
00400 getReferenceList(*featureDef, results);
00401 }
00402 }
00403 }
00404
00408 bool LingFeatures::hasReference(const string &feature,
00409 int referenceValue) const {
00410 if (d_posDef != NULL) {
00411 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00412 if (featureDef != NULL) {
00413 return hasReference(*featureDef, referenceValue);
00414 }
00415 }
00416 return false;
00417 }
00418
00422 void LingFeatures::setReference(const LingDef::Feature &feature,
00423 int referenceValue) {
00424 if (!feature.allowSeveralValues()) {
00425
00426 d_references.erase(&feature);
00427 }
00428 d_references.insert(pair<const LingDef::Feature *, int>(&feature,
00429 referenceValue));
00430 setSub(feature);
00431 }
00432
00436 int LingFeatures::getReference(const LingDef::Feature &feature) const {
00437 multimap<const LingDef::Feature *, int>::const_iterator
00438 findIt = d_references.find(&feature);
00439 if (findIt != d_references.end()) {
00440 return (*findIt).second;
00441 }
00442 return 0;
00443 }
00444
00448 void LingFeatures::getReferenceList(const LingDef::Feature &feature,
00449 vector<int> &results) const {
00450 {for (multimap<const LingDef::Feature *, int>::const_iterator
00451 it = d_references.find(&feature);
00452 it != d_references.end() && (*it).first == &feature; ++it) {
00453 results.push_back((*it).second);
00454 }}
00455 }
00456
00460 bool LingFeatures::hasReference(const LingDef::Feature &feature,
00461 int referenceValue) const {
00462 {for (multimap<const LingDef::Feature *, int>::const_iterator
00463 it = d_references.find(&feature);
00464 it != d_references.end() && (*it).first == &feature; ++it) {
00465 if ((*it).second == referenceValue) {
00466 return true;
00467 }
00468 }}
00469 return false;
00470 }
00471
00475 void LingFeatures::setEnum(const string &enumFeature,
00476 const string &valueFeature) {
00477 if (d_posDef != NULL) {
00478 const LingDef::Feature *enumFeatureDef = d_posDef->getFeature(enumFeature);
00479 if (enumFeatureDef != NULL) {
00480 const LingDef::Feature *valueFeatureDef =
00481 d_posDef->getFeature(valueFeature);
00482
00483 if (valueFeatureDef != NULL) {
00484 setEnum(*enumFeatureDef, *valueFeatureDef);
00485 }
00486 }
00487 }
00488 }
00489
00493 void LingFeatures::setEnum(const LingDef::Feature &enumFeature,
00494 const LingDef::Feature &valueFeature) {
00495 if (valueFeature.getParentEnum() == &enumFeature) {
00496 set(valueFeature);
00497 }
00498 }
00499
00503 const LingDef::Feature *
00504 LingFeatures::getEnumValue(const string &enumFeature) const {
00505 if (d_posDef != NULL) {
00506 const LingDef::Feature *featureDef = d_posDef->getFeature(enumFeature);
00507 if (featureDef != NULL) {
00508 return getEnumValue(*featureDef);
00509 }
00510 }
00511 return NULL;
00512 }
00513
00517 const LingDef::Feature *
00518 LingFeatures::getEnumValue(const LingDef::Feature &enumFeature) const {
00519
00520 multimap<const LingDef::Feature *, const LingDef::Feature *>::const_iterator
00521 findIt = d_enums.find(&enumFeature);
00522 if (findIt != d_enums.end()) {
00523 return (*findIt).second;
00524 }
00525 return NULL;
00526 }
00527
00531 void LingFeatures::getEnumValues(const string &enumFeature,
00532 std::set<const LingDef::Feature *> &results)
00533 const {
00534 if (d_posDef != NULL) {
00535 const LingDef::Feature *featureDef = d_posDef->getFeature(enumFeature);
00536 if (featureDef != NULL) {
00537 getEnumValues(*featureDef, results);
00538 }
00539 }
00540 }
00541
00545 void LingFeatures::getEnumValues(const LingDef::Feature &enumFeature,
00546 std::set<const LingDef::Feature *> &results)
00547 const {
00548 {for (multimap<const LingDef::Feature *, const LingDef::Feature *>::
00549 const_iterator it = d_enums.find(&enumFeature);
00550 it != d_enums.end() && (*it).first == &enumFeature; ++it) {
00551 results.insert((*it).second);
00552 }}
00553 }
00554
00558 int LingFeatures::hasEnumValue(const std::string &enumValueName) const {
00559 if (d_posDef != NULL) {
00560 const LingDef::Feature *enumValueDef = d_posDef->getFeature(enumValueName);
00561 if (enumValueDef != NULL) {
00562 return hasEnumValue(*enumValueDef);
00563 }
00564 }
00565 return 0;
00566 }
00567
00571 int LingFeatures::hasEnumValue(const LingDef::Feature &enumValueFeature)
00572 const {
00573 const LingDef::Feature *parentEnum = enumValueFeature.getParentEnum();
00574
00575 if (parentEnum != NULL && has(enumValueFeature)) {
00576 std::set<const LingDef::Feature *> enumValues;
00577 getEnumValues(*parentEnum, enumValues);
00578 if (enumValues.size() > 1) {
00579 return 2;
00580 }
00581 return 1;
00582 }
00583
00584 return 0;
00585 }
00586
00590 bool LingFeatures::has(const string &feature) const {
00591 if (d_posDef != NULL) {
00592 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00593 if (featureDef != NULL) {
00594 return has(*featureDef);
00595 }
00596 }
00597 return false;
00598 }
00599
00603 bool LingFeatures::hasNegative(const std::string &feature) const {
00604 if (d_posDef != NULL) {
00605 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00606 if (featureDef != NULL) {
00607 return hasNegative(*featureDef);
00608 }
00609 }
00610 return false;
00611 }
00612
00616 bool LingFeatures::isDefined(const std::string &feature) const {
00617 if (d_posDef != NULL) {
00618 const LingDef::Feature *featureDef = d_posDef->getFeature(feature);
00619 if (featureDef != NULL) {
00620 return isDefined(*featureDef);
00621 }
00622 }
00623 return false;
00624 }
00625
00629 const LingFeatures &LingFeatures::operator+=(const std::string &feature) {
00630 set(feature);
00631 return *this;
00632 }
00633
00637 const LingFeatures &LingFeatures::operator+=(const LingDef::Feature &feature) {
00638 set(feature);
00639 return *this;
00640 }
00641
00645 const LingFeatures &LingFeatures::operator-=(const std::string &feature) {
00646 setNegative(feature);
00647 return *this;
00648 }
00649
00653 const LingFeatures &LingFeatures::operator-=(const LingDef::Feature &feature) {
00654 setNegative(feature);
00655 return *this;
00656 }
00657
00661 bool LingFeatures::covers(const LingFeatures &other) const {
00662 if (d_posDef != other.d_posDef) {
00663 return false;
00664 }
00665
00666
00667 {for (std::set<const LingDef::Feature *>::const_iterator
00668 it = d_features.begin();
00669 it != d_features.end(); ++it) {
00670 const LingDef::Feature *feature = *it;
00671 if (!other.has(*feature)) {
00672 return false;
00673 }
00674 }}
00675
00676 return true;
00677 }
00678
00682 LingFeatures::FeatureIterator LingFeatures::featuresBegin() const {
00683 return d_features.begin();
00684 }
00685
00689 LingFeatures::FeatureIterator LingFeatures::featuresEnd() const {
00690 return d_features.end();
00691 }
00692
00696 void LingFeatures::setDefaultForEnum(const LingDef::Feature &enumFeature) {
00697 {for (LingDef::Feature::EnumChildrenIterator
00698 it = enumFeature.enumChildrenBegin();
00699 it != enumFeature.enumChildrenEnd(); ++it) {
00700 const LingDef::Feature *enumValueFeature = *it;
00701 if (enumValueFeature->isDefault()) {
00702 set(*enumValueFeature);
00703 return;
00704 }
00705 }}
00706 }
00707
00711 void LingFeatures::setDefaults() {
00712 const LingDef::Pos *posDef = d_posDef;
00713 while (posDef != NULL) {
00714 {for (LingDef::Pos::FeatureIterator it = posDef->defaultFeaturesBegin();
00715 it != posDef->defaultFeaturesEnd(); ++it) {
00716 const LingDef::Feature *feature = *it;
00717 set(*feature);
00718 }}
00719 posDef = posDef->getSuperPos();
00720 }
00721 }
00722
00726 const std::string &LingFeatures::getLemma() const {
00727 return d_lemma;
00728 }
00729
00733 void LingFeatures::setLemma(const std::string &val) {
00734 d_lemma = val;
00735 }
00736
00740 const std::string &LingFeatures::getForm() const {
00741 return d_form;
00742 }
00743
00747 void LingFeatures::setForm(const std::string &val) {
00748 d_form = val;
00749 }