【Python2】檔案,xml操作
阿新 • • 發佈:2021-01-08
文章目錄
1.檔案
1.1 增:寫入檔案內容給文字檔案
def writeTextFile(filePath, fileContent, encoding='utf8'):
with open(filePath, 'w', encoding=encoding) as file:
file.write(fileContent)
1.2 改:批量修改圖片大小
import os
from PIL import Image
def getFilePathList(dirPath, partOfFileName= ''):
allFileName_list = list(os.walk(dirPath))[0][2]
fileName_list = [k for k in allFileName_list if partOfFileName in k]
filePath_list = [os.path.join(dirPath, k) for k in fileName_list]
return filePath_list
def batchResizeImage(oldDirPath, newDirPath, height, width):
if not os. path.isdir(newDirPath):
os.mkdir(newDirPath)
jpgFilePath_list = getFilePathList(oldDirPath, '.jpg')
for jpgFilePath in jpgFilePath_list:
image = Image.open(jpgFilePath)
resized_image = image.resize((height, weight), Image.ANTIALIAS)
jpgFileName = os.path.split(jpgFilePath)[1]
saveFilePath = os.path.join(newDirPath, jpgFileName)
resized_image.save(saveFilePath)
oldDirPath = 'source_images'
newDirPath = 'train_images'
height = 640
width = 640
batchResizeImage(oldDirPath, newDirPath, height, width)
1.3 查:查詢資料夾中的檔案
import os
def getFileNameList(dirPath, partOfFileName=''):
allFileName_list = list(os.walk(dirPath))[0][2]
fileName_list = [k for k in allFileName_list if partOfFileName in k]
return fileName_list
def getFilePathList(dirPath, partOfFileName=''):
allFileName_list = list(os.walk(dirPath))[0][2]
fileName_list = [k for k in allFileName_list if partOfFileName in k]
filePath_list = [os.path.join(dirPath, k) for k in fileName_list]
return filePath_list
查:讀取檔案
def readTextFile(filePath, encoding='utf8'):
with open(filePath, encoding=encoding) as file:
return file.read()
查:搜尋資料夾路徑內含有指定內容的程式碼檔案
import os
# 傳入3個引數:資料夾路徑dirPath、指定內容partOfFileContent、程式碼檔案字尾名suffixOfFileName
def searchFileContent(dirPath, partOfFileContent, suffixOfFileName=''):
dirPath = os.path.expanduser(dirPath)
walk_list = list(os.walk(dirPath))
result_list = []
for walk in walk_list:
filePath_list = [os.path.join(walk[0], k) for k in walk[2] \
if k.rsplit('.', maxsplit=1)[1]==suffixOfFileName.strip('.')]
for filePath in filePath_list:
with open(filePath, encoding='=utf8') as file:
fileContent = file.read()
if partOfFileContent in fileContent:W
print(filePath)
result_list.append(filePath)
return result_list
2.xml
2.1 labelimg_yolo_txt轉pascal voc_xml
from PIL import Image
import os
#讀取檔案尺寸
def ImgSize(image):
img = Image.open(image)
w,h = img.width,img.height
return w,h
#labelimg中yolo轉voc圖位轉換
#width,height就是原圖的w,h #xmin指中心點佔橫比例,xmax指中心點佔豎比例 #ymin指bbox佔整圖寬比例,ymax指bbox佔整圖高比例
def ScaleCovertor(width,height,xmin,xmax,ymin,ymax):
center_x = round(float(xmin* width))
center_y = round(float(xmax * height))
bbox_width = round(float(ymin * width))
bbox_height = round(float(ymax * height))
xmin = str(int(center_x - bbox_width / 2 ))
ymin = str(int(center_y - bbox_height / 2))
xmax = str(int(center_x + bbox_width / 2))
ymax = str(int(center_y + bbox_height / 2))
return xmin,ymin,xmax,ymax
def Main(filepath): #filepath是txt資料夾路徑(裡面全是需要轉換的txt檔案)
#設定xml內部格式
xml_head = '''
<annotation>
<folder>Desktop</folder>
<filename>{}</filename>
<path>unknonw</path>
<source>
<database>unknow</database>
</source>
<size>
<width>{}</width>
<height>{}</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
'''
xml_obj = '''
<object>
<name>{}</name>
<pose>no</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>{}</xmin>
<ymin>{}</ymin>
<xmax>{}</xmax>
<ymax>{}</ymax>
</bndbox>
</object>
'''
xml_end = '''
</annotation>'''
counter = 1 #計數器
for filename in os.listdir(filepath): #現在的filename是帶字尾的
print ('Processing:->>',filename,'Number %s'%counter) #列印當前檔名 和 第幾個檔案
#原圖:
content=[] #建立內容列表,class,中心點佔比,bbox佔比
with open(filepath+'/'+filename,'r') as readlines:
for linecontent in readlines.readlines(): #讀取每一行內容
content.append(linecontent) #新增到列表中
w,h = ImgSize('C:/Users/lenovo/Desktop/yuantu'+'/'+filename.split('.')[0]+'.jpg') #呼叫檔案尺寸讀取函式
#xml:
obj = '' #這裡建立xml,建立空字串
head = xml_head.format(str(filename.split('.')[0]+'.jpg'),str(w),str(h)) #向xml head裡新增檔名 檔案w和h
for info in content: #讀取每個檔案裡的內容
infodetail = info.split(' ') #以空格切割列表內的資料
#單獨讀取每個資料儲存到變數裡
Class,XMin,XMax,YMin,YMax = infodetail[0],infodetail[1],infodetail[2],infodetail[3],infodetail[4],
xmin,ymin,xmax,ymax = ScaleCovertor(w,h,float(XMin),float(XMax),float(YMin),float(YMax))
label= {1:'obstacle',0:'people'} #確定label和類的對映關係,下行用到
obj += xml_obj.format(label[int(Class)],xmin,ymin,xmax,ymax) #向主object裡迴圈新增 一個圖裡的物體或類
#寫入xml檔案
with open('C:/Users/lenovo/Desktop/annotation2/xml'+filename.split('.')[0]+'.xml','w') as xmw:
#建立寫入 合併 三個 xml主體部分
xmw.write(head+obj+xml_end)
counter+=1
Main('C:/Users/lenovo/Desktop/annotation2/txt') #txt資料夾
#驗證轉的對錯
import matplotlib.pyplot as plt
import matplotlib.image as Image #這個讀取庫比較方便 不用把資料轉來轉去,plt可以直接使用
%matplotlib inline
img = Image.imread('/Users/Desktop/annotation2/test/yuantu/'+'20190721062948_000394_cc8cdaa5ee38.jpg') #讀取
x1,y1,x2,y2 = 1344, 495, 1722, 1080 # 自己找驗證
plt.gca().add_patch (
plt.Rectangle(xy=(x1,y1),width=x2-x1,height=y2-y1,fill=False,edgecolor='red',linewidth=2)
)
plt.imshow(img)
plt.show() #根據環境新增
2.2 刪除 w label
import re
import os
rawfolder='123' #存放三張xml的資料夾
newfolder='33333' #生成的新的xml資料夾
for i in os.listdir(rawfolder):
print (i) #輸出#20190720073948_000258_cc8cdaa5ee49.xml
#20190720073950_000257_cc8cdaa64390.xml
#20190720073950_000258_cc8cdaa5ee3e.xml
with open(rawfolder+'/'+i,'r') as r:
content = r.readlines()
#print(content)
#輸出['<annotation>\n', '\t<folder>img</folder>\n', '\t<filename>20190720073948_000258_cc8cdaa5ee49.JPG</filename>\n', ...]
c = 0
for j in content:
if '<name>w</name>' in j:
print (j,'下標-》',c) #c為14行<name>w</name>,從0行開始
start = 0
end = c-1 # c-1為上一行<object>
first_part = content[start:end]
second_part = content[end+12:] #整個一塊為w的object
final = first_part+second_part
for x in final:
with open(newfolder+'/'+i,'a+') as w:
w.writelines(x)
print (x)
c+=1
# break
2.3 檢查不是people和obstacle的label
# 檢查不是people和obstacle的label
import re
import os
rawfolder='123'
#newfolder='33333'
for i in os.listdir(rawfolder):
# print (i)
with open(rawfolder+'/'+i,'r') as r:
content = r.readlines()
# print(content)
for j in content:
if '<name>' in j and ('people' not in j and 'obstacle'not in j):
print (j)
print (i)
2.4 讀取指定字尾
import os
def get_filePathList(dirPath, partOfFileName=''):
all_fileName_list = next(os.walk(dirPath))[2] #['20190720072950_000256_cc8cdaa64390.JPG',
#'20190720073948_000258_cc8cdaa5ee49.JPG',
# '20190720073950_000257_cc8cdaa64390.JPG',
# '20190720074950_000259_cc8cdaa5ee3e .jpg',
#'20190720074950_000259_cc8cdaa5ee3e.JPG']
fileName_list = [k for k in all_fileName_list if partOfFileName in k] #去除除了'.JPG'檔案,不含前面絕對路徑
filePath_list = [os.path.join(dirPath, k) for k in fileName_list] #含全部路徑,['',
# '']
#return fileName_list
return filePath_list
dirPath='C:/Users/lenovo/Desktop/lian'
a=get_filePathList(dirPath,'.JPG')
a
#print(len(a))
2.5 檢查是否有圖片漏標,並刪除漏標圖片
def delete_file(filePath):
if not os.path.exists(filePath): #filePath指C:/Users/lenovo/Desktop/lianxi/img\\20190720072950_000256_cc8cdaa64390.JPG'
print('%s 這個檔案路徑不存在,請檢查一下' %filePath)
else:
print('%s 這個路徑的檔案需手動刪除' %filePath)
def check_1(dirPath, suffix):
xmlFilePath_list = get_filePathList(dirPath, '.xml') # 與suffix不同,自己指定'.xml'
xmlFilePathPrefix_list = [k[:-4] for k in xmlFilePath_list] # 不帶.xml
xmlFilePathPrefix_set = set(xmlFilePathPrefix_list)
#print(xmlFilePathPrefix_set) #{'絕對路徑不帶字尾',
# ' ' }
imageFilePath_list = get_filePathList(dirPath, suffix)
imageFilePathPrefix_list = [k[:-4] for k in imageFilePath_list] # 不帶字尾
imageFilePathPrefix_set = set(imageFilePathPrefix_list)
#print(imageFilePathPrefix_set)
redundant_imgFilePathPrefix_list = list(imageFilePathPrefix_set - xmlFilePathPrefix_set)
redundant_imgFilePath_list = [k+'.JPG' for k in redundant_imgFilePathPrefix_list]
#上行帶.JPG字尾, 如果自定義.0JPG,顯示這個檔案路徑不存在,請檢查一下
for imgFilePath in redundant_imgFilePath_list:
delete_file(imgFilePath)
dirPath='C:/Users/lenovo/Desktop/lx'
check_1(dirPath,'.JPG')
2.6 檢測標記的box是否超過圖片的邊界,若有則顯示刪除與box相關的xml檔案和圖片檔案
import xml.etree.ElementTree as ET
from PIL import Image
def check_2(dirPath, suffix):
xmlFilePath_list = get_filePathList(dirPath, '.xml')
#print(xmlFilePath_list) #['.xml全部路徑',
# ' ']
allFileCorrect = True # 跳出for迴圈則執行 if allFileCorrect
for xmlFilePath in xmlFilePath_list:
imageFilePath = xmlFilePath[:-4] + '.' + suffix.strip('.')
#print(xmlFilePath)
#print(imageFilePath)
#C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.xml
#C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.JPG
#.....
image = Image.open(imageFilePath)
width, height = image.size
with open(xmlFilePath) as file:
fileContent = file.read()
#print(fileContent) #<annotation>...
root = ET.XML(fileContent) #根<annotation>...
object_list = root.findall('object') # <object>
for object_item in object_list:
bndbox = object_item.find('bndbox') #<bndbox>
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
if xmax>xmin and ymax>ymin and xmax<=width and ymax<=height:
continue
else:
delete_file(xmlFilePath)
delete_file(imageFilePath)
allFileCorrect = False
break
if allFileCorrect:
print('祝賀你! 已經通過檢驗,所有xml檔案中的標註框都沒有越界')
dirPath='C:/Users/lenovo/Desktop/lx' #lx資料夾裡.xml和.JPG混在一起
check_2(dirPath,'.JPG')#''裡必須.JPG或不填
2.7 檢查xmin<0…,並修改xmin…
#coding=utf-8
import os
import shutil
import random
from xml.etree.ElementTree import ElementTree,Element
import cv2
def read_xml(in_path):
'''
讀取並解析xml檔案
in_path: xml路徑
return: ElementTree
'''
tree = ElementTree()
tree.parse(in_path)
return tree
def check():
url = "C:/Users/lenovo/Desktop/source/xml_sum" # xml_sum只存放xml的資料夾
for item in os.listdir(url): # item為.xml檔案
tree = read_xml(url + "/" + item) # read_xml函式上面定義
root = tree.getroot()
object = root.findall("object")
size = root.find("size")
width =int(size.find("width").text)
height = int(size.find("height").text)
if object == None:
print(item)
continue
for it in object:
bndbox = it.find("bndbox")
if bndbox == None:
print(item)
xmin = int(bndbox.find("xmin").text)
xmax = int(bndbox.find("xmax").text)
ymin = int(bndbox.find("ymin").text)
ymax = int(bndbox.find("ymax").text)
if xmin <= 0 or xmin >= xmax or ymin <=0 or ymin >= ymax:
print(item)
if xmax > width or ymax> height:
print(item)
if __name__ =='__main__':
check() # 不輸出則表示全對。輸出123111.xml,沒有列表引號
import xml.etree.ElementTree as ET
def generateNewXmlFile(old_xmlFilePath, new_xmlFilePath):
with open(old_xmlFilePath) as file:
fileContent = file.read()
root = ET.XML(fileContent)
object_list = root.findall('object')
for object_item in object_list:
bndbox = object_item.find('bndbox')
xmin = bndbox.find('xmin')
xminValue = int(xmin.text)
xmin.text = str(int(xminValue + 1))
ymin = bndbox.find('ymin')
yminValue = int(ymin.text)
ymin.text = str(int(yminValue + 1))
xmax = bndbox.find('xmax')
xmaxValue = int(xmax.text)
xmax.text = str(int(xmaxValue + 1))
ymax = bndbox.find('ymax')
ymaxValue = int(ymax.text)
ymax.text = str(int(ymaxValue + 1))
tree = ET.ElementTree(root)
tree.write(new_xmlFilePath)
old_dirPath ='C:/Users/lenovo/Desktop/999/8'
new_dirPath ='C:/Users/lenovo/Desktop/999/9'
def batch_modify_xml(old_dirPath, new_dirPath): #修改資料夾中的若干xml檔案
#以下4行將new_dirPath和xmlFileName名稱結合,內容是呼叫generateNewXmlFile函式改寫
xmlFilePath_list = get_filePathList(old_dirPath, '.xml')
for xmlFilePath in xmlFilePath_list:
xmlFileName = os.path.split(xmlFilePath)[1] #1後
#print(xmlFileName) #輸出 20190720073950_000257_cc8cdaa64390.xml
new_xmlFilePath = os.path.join(new_dirPath, xmlFileName)
generateNewXmlFile(xmlFilePath, new_xmlFilePath)
batch_modify_xml(old_dirPath, new_dirPath)
2.8 讀取classname
def get_classNameList(txtFilePath):
with open(txtFilePath, 'r', encoding='utf8') as file:
fileContent = file.read()
line_list = [k.strip() for k in fileContent.split('\n') if k.strip()!='']
className_list= sorted(line_list, reverse=False)
return className_list
txtFilePath='C:/Users/lenovo/Desktop/labelImg/data/predefined_classes -outofstock.txt'
get_classNameList(txtFilePath)
import os
pathnoname,name=os.path.split("E:/lpthw/zedshaw/ex19.py")
print(pathnoname)
print(name)
# 新增環境變數
import sys
sys.path.append('')
2.9 檢查trainval.txt
import cv2
from os import listdir
from os.path import isfile,isdir,join
trainval_list = list()
with open('./trainval.txt','r') as f:
for line in f.readlines():
line = line.strip('\n')
a = line +'.jpg'
trainval_list.append(a)
print(trainval_list)
for i in trainval_list:
img_path = '{}{}'.format('./img3/',i)
img = cv2.imread(img_path)
try:
img.shape
print(img.shape) # 在img3資料夾中沒有......11111.jpg圖片
except:
print('fail read:' + img_path)
continue
B站/知乎/微信公眾號:碼農程式設計錄