XML to PostgreSQL with Python
My code looks like :
#!/usr/bin/python
import codecs
import re
import psycopg2
import sys
import os
import time
import shutil
import csv
# Code for converting from iso-8859-2 to utf-8
BLOCKSIZE = 1048576
with codecs.open('DVBSNOOP/epg_dvb_snoop.txt', "r", "iso-8859-2") as
sourceFile:
with codecs.open('DVBSNOOP/epg_slo_sort.txt', "w", "utf-8") as
targetFile:
while True:
contents = sourceFile.read(BLOCKSIZE)
if not contents:
break
targetFile.write(contents)
break
# Code for regex remove specific string from text
input_file = open('epg_slo_sort.txt', "r") # read
output_file = open('epg_slo_xml.txt', "w") # write
for line in input_file:
line = re.sub(r"\d{8}:|\d{7}:|\d{6}:|\d{5}:|\d{4}:","", line) # remove
before nubers
line = re.sub(r"^//","",line) # remove //
line = re.sub(r"\([^)]*\)","",line) # remove ()
line = re.sub(r"= --> refers to PMT program_number","",line) # remove
= --> refers to PMT program_number
line = re.sub(r"-- Charset: ISO/IEC 8859 special table","",line) #
remove -- Charset: ISO/IEC 8859 special table
line = re.sub(r"[\[]\]","",line) # remove []
line = re.sub(r"Duration: \d{1}[^0-9]\d{7}","Duration",line) #remove
Duration: 0x0021000
line = re.sub(r"Start_time: \d{1}\w+","Start ",line) #remove
Start_time: 0xdce9115000
line = line.replace('"..',"").replace('"',"").replace(" . .
.","").replace("-","") # remove ".. , "" ....
line = line.replace ("Service_ID:","Program") # replace Service_ID to
Program
line = line.replace("event_name:","Title") # Replace event_name to Title
line = line.replace("[=","").replace("]","") # replace [= to blank
output_file.write(line) # write to file
# Code for output orginal file with time-stamp copy of epg_slo_xml.txt
current_time = time.strftime("_%d.%m.%y_%H:%M", time.localtime()) #
Get local time
shutil.copy('SORT/epg_slo_xml.txt','SORT/BACKUP/epg_slo_xml%s.txt' %
current_time) # Copy file to SORT/BACKUP/ file+date+time
Create XML file code:
# Code for creating XML document
output = open('epg.xml','w')
print >> output, '<?xml version="1.0" encoding="utf-8" ?>'
print >> output, '<epg>'
with open('SORT/epg_slo_xml.txt','r') as txt:
for line in txt:
if 'Program' in line:
output.write('<item>'+'<program>'+line+'</program>')
if 'Start' in line:
output.write('<start>'+line+'</start>')
if 'Duration' in line:
output.write('<duration>'+line+'</duration>')
if 'Title' in line :
output.write('<title>'+line+'</title>'+'</item>')
print >> output , '</epg>'
And the question part:
# Connection to Database
def main():
#Define our connection string
conn_string = ("host=localhost dbname=epg user=car password=pass")
# get a connection, if a connect cannot be made an exception will be
raised here
conn = psycopg2.connect(conn_string)
# conn.cursor will return a cursor object
cursor = conn.cursor()
print "Connected!\n"
if __name__ == "__main__":
main()
The question is how can I process my XML file, I mean open it process
every line with tags and upload into PostgrSQL table with all informations
from XML.
And my XML file looks like :
<?xml version="1.0" encoding="utf-8" ?>
<epg>
<item>
<program> Program 10 </program>
<start> Start 20130918 11:50:00 </start>
<duration> Duration 02:10:00 </duration>
<title> Title Obvestila </title>
</item>
</epg>
Thx, for reading this article, if you can give me any solution or code,
for processing xml file to postgresql.
No comments:
Post a Comment