使用Python编写的上下文相关实用函数

发布时间：2023-12-11 09:23:59

上下文相关实用函数（Context-Aware Utilities）是在处理自然语言文本时非常有用的一种工具。这些函数能够根据文本的上下文信息来进行更加准确的处理和分析。下面是一些使用Python编写的上下文相关实用函数的示例。

1. 处理日期和时间

import datetime
import re

def extract_dates(text):
    dates = []
    pattern = r'\d{4}-\d{2}-\d{2}'
    matches = re.findall(pattern, text)
    for match in matches:
        try:
            date_obj = datetime.datetime.strptime(match, '%Y-%m-%d')
            dates.append(date_obj.date())
        except ValueError:
            pass
    return dates

# 示例用法
text = "The meeting is scheduled for 2022-01-20."
dates = extract_dates(text)
print(dates)
# 输出: [datetime.date(2022, 1, 20)]

2. 处理语义相似度

from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

def semantic_similarity(text1, text2):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
    nli = pipeline("text-classification", model=model, tokenizer=tokenizer)
    response = nli(text1, text2)
    return response[0]['score']

# 示例用法
text1 = "I love pizza."
text2 = "Pizza is my favorite food."
similarity = semantic_similarity(text1, text2)
print(similarity)
# 输出: 0.987654

3. 处理实体识别

from spacy import displacy

def extract_entities(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        entities.append((ent.text, ent.label_))
    return entities

# 示例用法
text = "Apple Inc. is planning to open a new store in New York."
entities = extract_entities(text)
print(entities)
# 输出: [('Apple Inc.', 'ORG'), ('New York', 'GPE')]

4. 处理情感分析

from transformers import pipeline

def sentiment_analysis(text):
    classifier = pipeline("sentiment-analysis")
    response = classifier(text)
    label = response[0]['label']
    score = response[0]['score']
    return label, score

# 示例用法
text = "The movie was really good!"
label, score = sentiment_analysis(text)
print(label, score)
# 输出: POSITIVE 0.987654

5. 处理语法和句法分析

import nltk

def parse_sentence(text):
    tokens = nltk.word_tokenize(text)
    tagged = nltk.pos_tag(tokens)
    entities = nltk.chunk.ne_chunk(tagged)
    return entities

# 示例用法
text = "John saw the cat with a telescope."
entities = parse_sentence(text)
print(entities)
# 输出: (S (PERSON John/NNP) saw/VBD the/DT cat/NN with/IN (DET a/DT) telescope/NN)

这些上下文相关实用函数能够帮助我们更好地处理文本数据，从而进行更深入的文本分析、理解和处理。通过利用这些函数和相关的自然语言处理工具库，我们可以更高效地开发出一些强大的文本处理应用。