[
IsafEvent(
name='5',
text='2013-01-S-025\n\nKABUL, Afghanistan (Jan. 25, 2013)\nDuring a security operation in Andar district, Ghazni province, yesterday, an Afghan and coalition force killed the Taliban leader, Alaudin. Alaudin oversaw a group of insurgents responsible for conducting remote-controlled improvised explosive device and small-arms fire attacks against Afghan and coalition forces. Prior to his death, Alaudin was planning attacks against Afghan National Police in Ghazni province.',
start_date=datetime.date(2013, 1, 24),
event_type={'insurgentskilled'},
province={'ghazni'},
target_group={'taliban'},
min_killed=1,
min_captured=0,
killq=True,
captureq=False,
killcaptureraid=False,
airstrike=False,
noshotsfired=False,
min_leaders_killed=1,
min_leaders_captured=0,
predictions={}
),
IsafEvent(
name='2',
text='2011-11-S-034\nISAF Joint Command - Afghanistan\nFor Immediate Release\n\nKABUL, Afghanistan (Nov. 20, 2011)\nA coalition security force detained numerous suspected insurgents during an operation in Marjeh district, Helmand province, yesterday. The force conducted the operation after receiving information that a group of insurgents were at a compound in the area. After calling for the men inside to come out peacefully, the insurgents emerged and were detained without incident.',
start_date=datetime.date(2011, 11, 19),
event_type={'detention'},
province={'helmand'},
target_group={''},
min_killed=0,
min_captured=4,
killq=False,
captureq=True,
killcaptureraid=True,
airstrike=False,
noshotsfired=False,
min_leaders_killed=0,
min_leaders_captured=0,
predictions={}
)
]
from openai import OpenAI
from rich import print
import json
import os
def query_openai(article_text: str, model: str) -> str:
query = (
f"The following is a press release issued by ISAF (formerly operating in Afghanistan):\n{article_text}\n\n"
"## Extraction request\n"
"Please extract the following information from the press release:\n"
"- The name of the event (summarising the event / text as a headline)\n"
"- The start date of the event\n"
"- The event type(s)\n"
"- The province(s) in which the event occurred\n"
"- The target group(s) of the event\n"
"- The minimum number of people killed during the event\n"
"- The minimum number of people captured during the event\n"
"- Whether someone was killed or not during the event\n"
"- Whether someone was captured or not during the event\n"
"- Whether the event was a so-called 'kill-capture raid'\n"
"- Whether an airstrike was used during the event\n"
"- Whether no shots were fired during the event\n"
"- The minimum number of leaders killed during the event\n"
"- The minimum number of leaders captured during the event\n\n"
"## Annotation notes:\n"
"- A 'faciliator' is not a leader.\n"
"- If a press release states that 'insurgents' were detained without further "
"details, assign a minimum number of two detained. Interpret 'a couple' as "
"two. Interpret 'several' as at least three, even though it may sometimes "
"refer to seven or eight. Classify the terms 'a few', 'some', 'a group', 'a "
"small group', and 'multiple' as denoting at least three, even if they "
"sometimes refer to larger numbers. Choose the smaller number if no other "
"information is available in the press release to come up with a minimally "
"acceptable figure. Interpret 'numerous' and 'a handful' as at least four, "
"and 'a large number' as at least five.\n\n"
"## Example:\n"
"Article text: 'ISAF Joint Command Evening Operational Update Feb. 19, 2011\nISAF Joint Command - "
"Afghanistan\u20282011-02-S-143\u2028For Immediate Release \u2028\u2028KABUL, Afghanistan (Feb. 19)\u2028\u2028ISAF "
"service members at a compound in Sangin district, Helmand province observed numerous insurgents north and south of "
"their position talking on radios today. After gaining positive identification of the insurgent positions, the "
"coalition troops engaged, killing several insurgents. Later, the ISAF troops observed more insurgents positioning "
"in the area with weapons. After positive identification, coalition forces continued firing on the various insurgent "
"positions, resulting in several more insurgents being killed.'\n\n"
'Output: `{"name":"Several insurgents killed in '
'Helmand","start_date":"2011-02-18","event_type":["insurgentskilled"],"province":["helmand"],"target_group":[""],"mi'
'n_killed":6,"min_captured":0,"killq":true,"captureq":false,"killcaptureraid":false,"airstrike":false,"noshotsfired"'
':false,"min_leaders_killed":0,"min_leaders_captured":0}`'
)
# set up the prediction harness
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
response = client.chat.completions.create(
model=model,
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": "You are an expert at identifying events in a press release. You are precise "
"and always make sure you are correct, drawing inference from the text of the "
"press release.\n\n You always return a JSON string with the following schema: "
"## JSON Schema details\n"
"Here is some of the schema for the JSON output string you "
"should make use of: event_types = ['airstrike', 'detention', "
"'captureandkill', 'insurgentskilled', 'exchangeoffire', 'civiliancasualty'], "
"provinces = ['badakhshan', 'badghis', 'baghlan', 'balkh', 'bamyan', "
"'day_kundi', 'farah', 'faryab', 'ghazni', 'ghor', 'helmand', 'herat', "
"'jowzjan', 'kabul', 'kandahar', 'kapisa', 'khost', 'kunar', 'kunduz', "
"'laghman', 'logar', 'nangarhar', 'nimroz', 'nuristan', 'paktya', 'paktika', "
"'panjshir', 'parwan', 'samangan', 'sar_e_pul', 'takhar', 'uruzgan', "
"'wardak', 'zabul'], target_groups = ['taliban', 'haqqani', 'criminals', "
"'aq', 'hig', 'let', 'imu', 'judq', 'iju', 'hik', 'ttp', 'other']\n\n",
},
{"role": "user", "content": query},
],
temperature=1,
)
return response.choices[0].message.content
# find out how many of the predictions are None values or empty strings
missing_values = {
"gpt-4o": 0,
"gpt-4-turbo": 0,
"gpt-3.5-turbo": 0,
"tinyllama-templatefree": 0,
"tinyllama-sharegpt": 0,
"finetuned-openai-gpt-3.5-turbo-1106": 0,
"finetuned-llama3-7b-32k-openpipe": 0,
"mistral-lora-templatefree": 0,
"finetuned-mistral-7b-optimised-openpipe": 0,
"ft-solar-1-mini-chat-240612-predibase": 0,
}
for row in dataset_with_preds:
for model in row["predictions"]:
if row["predictions"][model] is None or row["predictions"][model] == "":
missing_values[model] += 1
print(missing_values)
标注说明:‘facilitator’ 不是领导者。如果新闻稿中提到‘叛乱分子’被拘留而没有进一步细节,则分配至少两名被拘留者。将‘a couple’解释为两人。将‘several’解释为至少三人,尽管有时可能指七或八人。将‘a few’、‘some’、‘a group’、‘a small group’和‘multiple’解释为至少三人,即使有时它们指代更多人数。如果新闻稿中没有其他信息来提供一个最低可接受的数字,请选择较小的数字。将‘numerous’和‘a handful’解释为至少四人,而‘a large number’解释为至少五人。