"""
Named Entity Recognition (NER)
Generated by Eden via recursive self-improvement
2025-10-28 12:34:50.403786
"""

from transformers import AutoTokenizer, AutoModelForTokenClassification
from typing import List, Dict

class NamedEntityRecognizer:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-cased-finetuned-conll03-german")
        self.model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-base-cased-finetuned-conll03-german")

    def extract_entities(self, text: str) -> List[Dict]:
        inputs = self.tokenizer(text, return_tensors="pt")
        outputs = self.model(**inputs)
        prediction = outputs.logits.argmax(dim=2).tolist()[0]
        
        entities = []
        current_entity = None
        
        for i, (token, pred) in enumerate(zip(self.tokenizer.convert_ids_to_tokens(inputs.input_ids[0]), prediction)):
            if token == self.tokenizer.cls_token:
                continue
            label = self.model.config.id2label[pred]
            
            if current_entity is None:
                if label != "O":
                    current_entity = {"text": token, "label": label}
            else:
                if label == current_entity["label"]:
                    current_entity["text"] += " " + token
                else:
                    entities.append(current_entity)
                    if label != "O":
                        current_entity = {"text": token, "label": label}
        # Add the last entity if any
        if current_entity is not None:
            entities.append(current_entity)
        
        return entities

# Example usage:
recognizer = NamedEntityRecognizer()
text = "Barack Obama was born in Hawaii."
entities = recognizer.extract_entities(text)
print(entities)