Source code for sgnlp.models.rst_pointer.utils

import argparse
import json
import re
from typing import Union

from .data_class import RstPointerParserTrainArgs, RstPointerSegmenterTrainArgs


[docs]def parse_args_and_load_config() -> Union[ RstPointerParserTrainArgs, RstPointerSegmenterTrainArgs ]: """Helper method to parse input arguments Returns: Union[RstPointerParserTrainArgs, RstPointerSegmenterTrainArgs]: returns the corresponding TrainArgs object depending on the input arguments. """ parser = argparse.ArgumentParser(description="RST Training") parser.add_argument( "--train_type", type=str, choices=["segmenter", "parser"], required=True, help="Select which model to train.", ) parser.add_argument( "--config_path", type=str, required=True, help="Path to config file." ) args = parser.parse_args() with open(args.config_path, "r") as f: config = json.load(f) if args.train_type == "parser": data_class = RstPointerParserTrainArgs(**config) elif args.train_type == "segmenter": data_class = RstPointerSegmenterTrainArgs(**config) else: raise ValueError(f"Invalid train_type: {args.train_type}") return data_class
# This transforms the relation label in the discourse tree files to one of 18 rhetorical classes # Check Section 4.1 of Carlson and D. Marcu. 2001. Discourse Tagging Reference Manual. for more details relation_to_rhetorical_class_map = { "Analogy": "Comparison", "Cause-Result": "Cause", "Comment-Topic": "Topic-Comment", "Comparison": "Comparison", "Consequence": "Cause", "Contrast": "Contrast", "Disjunction": "Joint", "Evaluation": "Evaluation", "Interpretation": "Evaluation", "Inverted-Sequence": "Temporal", "List": "Joint", "Otherwise": "Condition", "Problem-Solution": "Topic-Comment", "Proportion": "Comparison", "Question-Answer": "Topic-Comment", "Reason": "Explanation", "Same-Unit": "Same-Unit", "Sequence": "Temporal", "Statement-Response": "Topic-Comment", "Temporal-Same-Time": "Temporal", "Topic-Comment": "Topic-Comment", "analogy": "Comparison", "analogy-e": "Comparison", "antithesis": "Contrast", "antithesis-e": "Contrast", "attribution": "Attribution", "attribution-e": "Attribution", "attribution-n": "Attribution", "background": "Background", "background-e": "Background", "cause": "Cause", "circumstance": "Background", "circumstance-e": "Background", "comment": "Evaluation", "comment-e": "Evaluation", "comparison": "Comparison", "comparison-e": "Comparison", "concession": "Contrast", "concession-e": "Contrast", "conclusion": "Evaluation", "condition": "Condition", "condition-e": "Condition", "consequence-n": "Cause", "consequence-n-e": "Cause", "consequence-s": "Cause", "consequence-s-e": "Cause", "contingency": "Condition", "definition": "Elaboration", "definition-e": "Elaboration", "elaboration-additional": "Elaboration", "elaboration-additional-e": "Elaboration", "elaboration-general-specific": "Elaboration", "elaboration-general-specific-e": "Elaboration", "elaboration-object-attribute": "Elaboration", "elaboration-object-attribute-e": "Elaboration", "elaboration-part-whole": "Elaboration", "elaboration-part-whole-e": "Elaboration", "elaboration-process-step-e": "Elaboration", "elaboration-set-member": "Elaboration", "elaboration-set-member-e": "Elaboration", "enablement": "Enablement", "enablement-e": "Enablement", "evaluation-n": "Evaluation", "evaluation-s": "Evaluation", "evaluation-s-e": "Evaluation", "evidence": "Explanation", "evidence-e": "Explanation", "example": "Elaboration", "example-e": "Elaboration", "explanation-argumentative": "Explanation", "explanation-argumentative-e": "Explanation", "hypothetical": "Condition", "interpretation-n": "Evaluation", "interpretation-s": "Evaluation", "interpretation-s-e": "Evaluation", "manner": "Manner-Means", "manner-e": "Manner-Means", "means": "Manner-Means", "means-e": "Manner-Means", "otherwise": "Condition", "preference": "Comparison", "preference-e": "Comparison", "purpose": "Enablement", "purpose-e": "Enablement", "question-answer-n": "Topic-Comment", "question-answer-s": "Topic-Comment", "reason": "Explanation", "reason-e": "Explanation", "restatement": "Summary", "restatement-e": "Summary", "result": "Cause", "result-e": "Cause", "rhetorical-question": "Topic-Comment", "span": "span", "temporal-after": "Temporal", "temporal-after-e": "Temporal", "temporal-before": "Temporal", "temporal-before-e": "Temporal", "temporal-same-time": "Temporal", "temporal-same-time-e": "Temporal", } # This is the list of classes used for training/testing in relation labelling relation_table = [ "Attribution_NS", "Attribution_SN", "Background_NS", "Background_SN", "Cause_NN", "Cause_NS", "Cause_SN", "Comparison_NN", "Comparison_NS", "Comparison_SN", "Condition_NN", "Condition_NS", "Condition_SN", "Contrast_NN", "Contrast_NS", "Contrast_SN", "Elaboration_NS", "Elaboration_SN", "Enablement_NS", "Enablement_SN", "Evaluation_NN", "Evaluation_NS", "Evaluation_SN", "Explanation_NN", "Explanation_NS", "Explanation_SN", "Joint_NN", "Manner-Means_NS", "Manner-Means_SN", "Same-Unit_NN", "Summary_NS", "Summary_SN", "Temporal_NN", "Temporal_NS", "Temporal_SN", "TextualOrganization_NN", "Topic-Comment_NN", "Topic-Comment_NS", "Topic-Comment_SN", ]
[docs]def get_relation_and_nucleus(label_index): relation = relation_table[label_index] temp = re.split(r"_", relation) sub1 = temp[0] sub2 = temp[1] if sub2 == "NN": nuclearity_left = "Nucleus" nuclearity_right = "Nucleus" relation_left = sub1 relation_right = sub1 elif sub2 == "NS": nuclearity_left = "Nucleus" nuclearity_right = "Satellite" relation_left = "span" relation_right = sub1 elif sub2 == "SN": nuclearity_left = "Satellite" nuclearity_right = "Nucleus" relation_left = sub1 relation_right = "span" return nuclearity_left, nuclearity_right, relation_left, relation_right