Talk:Authority List

From Brede Wiki
Jump to: navigation, search

Convert to text:

pdftotext -layout series_2012-og-2013-3.pdf

Python:

import codecs
import re
 
filename = "series_2012-og-2013-3.txt"
with codecs.open(filename, encoding="UTF-8") as file:
    for line in file:
        if (re.match("^\s*$", line) or 
            re.match("^\s+Side \d+\s*$", line) or 
            re.match('^.\s+Autoritetsliste for serier.*', line)):
            pass
        else:
            matches = re.findall(r"""^\s?(\d+)\s+
                                     (\S+(?:\s\S+)*)\s+
                                     (\d+)\s+
                                     (?:(\d{4}-\d{3}[\dX])\s+)?
                                     (\w+)\s+
                                     (\S+(?:\s\S+)*)\s+
                                     (\d)
                                 """, line, flags=re.VERBOSE | re.UNICODE)
            if not matches:
                print(line)
Personal tools