1. 程式人生 > 其它 >【Python2】檔案,xml操作

【Python2】檔案,xml操作

技術標籤:Python列表xmlpython

文章目錄


1.檔案

1.1 增:寫入檔案內容給文字檔案

def writeTextFile(filePath, fileContent, encoding='utf8'):
    with open(filePath, 'w', encoding=encoding) as file:
        file.write(fileContent)

1.2 改:批量修改圖片大小

import os
from PIL import Image

def getFilePathList(dirPath, partOfFileName=
''): allFileName_list = list(os.walk(dirPath))[0][2] fileName_list = [k for k in allFileName_list if partOfFileName in k] filePath_list = [os.path.join(dirPath, k) for k in fileName_list] return filePath_list def batchResizeImage(oldDirPath, newDirPath, height, width): if not os.
path.isdir(newDirPath): os.mkdir(newDirPath) jpgFilePath_list = getFilePathList(oldDirPath, '.jpg') for jpgFilePath in jpgFilePath_list: image = Image.open(jpgFilePath) resized_image = image.resize((height, weight), Image.ANTIALIAS) jpgFileName = os.path.split(jpgFilePath)[1] saveFilePath = os.path.join(newDirPath, jpgFileName) resized_image.save(saveFilePath) oldDirPath = 'source_images' newDirPath = 'train_images' height = 640 width = 640 batchResizeImage(oldDirPath, newDirPath, height, width)

1.3 查:查詢資料夾中的檔案

import os

def getFileNameList(dirPath, partOfFileName=''):
    allFileName_list = list(os.walk(dirPath))[0][2]
    fileName_list = [k for k in allFileName_list if partOfFileName in k]
    return fileName_list
    
def getFilePathList(dirPath, partOfFileName=''):
    allFileName_list = list(os.walk(dirPath))[0][2]
    fileName_list = [k for k in allFileName_list if partOfFileName in k]
    filePath_list = [os.path.join(dirPath, k) for k in fileName_list]
    return filePath_list

查:讀取檔案

def readTextFile(filePath, encoding='utf8'):
    with open(filePath, encoding=encoding) as file:
        return file.read()

查:搜尋資料夾路徑內含有指定內容的程式碼檔案

import os
# 傳入3個引數:資料夾路徑dirPath、指定內容partOfFileContent、程式碼檔案字尾名suffixOfFileName
def searchFileContent(dirPath, partOfFileContent, suffixOfFileName=''):
    dirPath = os.path.expanduser(dirPath)
    walk_list = list(os.walk(dirPath))
    result_list = []
    for walk in walk_list:
        filePath_list = [os.path.join(walk[0], k) for k in walk[2] \
            if k.rsplit('.', maxsplit=1)[1]==suffixOfFileName.strip('.')]
        for filePath in filePath_list:
            with open(filePath, encoding='=utf8') as file:
                fileContent = file.read()
            if partOfFileContent in fileContent:W
                print(filePath)
                result_list.append(filePath)
    return result_list

2.xml

2.1 labelimg_yolo_txt轉pascal voc_xml

from PIL import Image
import os
    
#讀取檔案尺寸
def ImgSize(image):
    img = Image.open(image)
    w,h =  img.width,img.height
    return w,h

#labelimg中yolo轉voc圖位轉換
#width,height就是原圖的w,h  #xmin指中心點佔橫比例,xmax指中心點佔豎比例  #ymin指bbox佔整圖寬比例,ymax指bbox佔整圖高比例
def ScaleCovertor(width,height,xmin,xmax,ymin,ymax): 
    center_x = round(float(xmin* width))           
    center_y = round(float(xmax * height))
    bbox_width = round(float(ymin * width))
    bbox_height = round(float(ymax * height))

    xmin = str(int(center_x - bbox_width / 2 ))
    ymin = str(int(center_y - bbox_height / 2))
    xmax = str(int(center_x + bbox_width / 2))
    ymax = str(int(center_y + bbox_height / 2))
    return xmin,ymin,xmax,ymax

def Main(filepath): #filepath是txt資料夾路徑(裡面全是需要轉換的txt檔案)
#設定xml內部格式
    xml_head = '''
    <annotation>
        <folder>Desktop</folder>
        <filename>{}</filename>
        <path>unknonw</path>
        <source>
            <database>unknow</database>
        </source>
        <size>
            <width>{}</width>
            <height>{}</height>
            <depth>3</depth>
        </size>
        <segmented>0</segmented>
    '''
    xml_obj = '''
        <object>        
            <name>{}</name>
            <pose>no</pose>
            <truncated>0</truncated>
            <difficult>0</difficult>
            <bndbox>
                <xmin>{}</xmin>
                <ymin>{}</ymin>
                <xmax>{}</xmax>
                <ymax>{}</ymax>
            </bndbox>
        </object>
    '''
    xml_end = '''
    </annotation>'''
          
    counter = 1  #計數器
    for filename in os.listdir(filepath): #現在的filename是帶字尾的
        print ('Processing:->>',filename,'Number %s'%counter) #列印當前檔名 和 第幾個檔案
        
        #原圖:      
        content=[]  #建立內容列表,class,中心點佔比,bbox佔比
        with open(filepath+'/'+filename,'r') as readlines:
            for linecontent in readlines.readlines():  #讀取每一行內容
                 content.append(linecontent) #新增到列表中  
        w,h = ImgSize('C:/Users/lenovo/Desktop/yuantu'+'/'+filename.split('.')[0]+'.jpg')  #呼叫檔案尺寸讀取函式
        
        #xml:                    
        obj = ''   #這裡建立xml,建立空字串
        head = xml_head.format(str(filename.split('.')[0]+'.jpg'),str(w),str(h))  #向xml head裡新增檔名 檔案w和h
        for info in content:  #讀取每個檔案裡的內容
            infodetail = info.split(' ') #以空格切割列表內的資料
            #單獨讀取每個資料儲存到變數裡
            Class,XMin,XMax,YMin,YMax = infodetail[0],infodetail[1],infodetail[2],infodetail[3],infodetail[4],
            xmin,ymin,xmax,ymax  = ScaleCovertor(w,h,float(XMin),float(XMax),float(YMin),float(YMax))
            label= {1:'obstacle',0:'people'} #確定label和類的對映關係,下行用到
            obj += xml_obj.format(label[int(Class)],xmin,ymin,xmax,ymax) #向主object裡迴圈新增 一個圖裡的物體或類
            #寫入xml檔案
        with open('C:/Users/lenovo/Desktop/annotation2/xml'+filename.split('.')[0]+'.xml','w') as xmw:
            #建立寫入 合併 三個 xml主體部分
            xmw.write(head+obj+xml_end)
        counter+=1    
Main('C:/Users/lenovo/Desktop/annotation2/txt') #txt資料夾              

在這裡插入圖片描述

#驗證轉的對錯
import matplotlib.pyplot as plt
import matplotlib.image as Image #這個讀取庫比較方便 不用把資料轉來轉去,plt可以直接使用
%matplotlib inline
img = Image.imread('/Users/Desktop/annotation2/test/yuantu/'+'20190721062948_000394_cc8cdaa5ee38.jpg') #讀取
x1,y1,x2,y2 = 1344, 495, 1722, 1080 # 自己找驗證
 
plt.gca().add_patch ( 
 
    plt.Rectangle(xy=(x1,y1),width=x2-x1,height=y2-y1,fill=False,edgecolor='red',linewidth=2) 
)
 
plt.imshow(img)
plt.show() #根據環境新增

在這裡插入圖片描述

2.2 刪除 w label

import re 
import os
rawfolder='123'   #存放三張xml的資料夾
newfolder='33333' #生成的新的xml資料夾
for i in os.listdir(rawfolder):
    print (i)   #輸出#20190720073948_000258_cc8cdaa5ee49.xml
                     #20190720073950_000257_cc8cdaa64390.xml
                     #20190720073950_000258_cc8cdaa5ee3e.xml
                         
    with open(rawfolder+'/'+i,'r') as r:
        content = r.readlines()
        #print(content)
#輸出['<annotation>\n', '\t<folder>img</folder>\n', '\t<filename>20190720073948_000258_cc8cdaa5ee49.JPG</filename>\n', ...]

        c = 0
        for j in content:
            if '<name>w</name>' in j:
                print (j,'下標-》',c) #c為14行<name>w</name>,從0行開始
                start = 0
                end = c-1  # c-1為上一行<object>   
                first_part = content[start:end]
                second_part = content[end+12:] #整個一塊為w的object
                final = first_part+second_part
 
                for x in final:
                    with open(newfolder+'/'+i,'a+') as w:
                        w.writelines(x)
                        print (x)
            c+=1
       # break

在這裡插入圖片描述
在這裡插入圖片描述

2.3 檢查不是people和obstacle的label

# 檢查不是people和obstacle的label
import re 
import os
rawfolder='123'
#newfolder='33333'
for i in os.listdir(rawfolder):
#     print (i)
    with open(rawfolder+'/'+i,'r') as r:
        content = r.readlines()
#         print(content)
        for j in content:
            if '<name>' in j and ('people' not in j and 'obstacle'not in j):
                print (j)
                print (i)  

在這裡插入圖片描述

2.4 讀取指定字尾

import os
def get_filePathList(dirPath, partOfFileName=''):
    all_fileName_list = next(os.walk(dirPath))[2] #['20190720072950_000256_cc8cdaa64390.JPG',
                                                  #'20190720073948_000258_cc8cdaa5ee49.JPG',
                                                  # '20190720073950_000257_cc8cdaa64390.JPG',
                                                  # '20190720074950_000259_cc8cdaa5ee3e .jpg',
                                                  #'20190720074950_000259_cc8cdaa5ee3e.JPG'] 
    fileName_list = [k for k in all_fileName_list if partOfFileName in k] #去除除了'.JPG'檔案,不含前面絕對路徑
    filePath_list = [os.path.join(dirPath, k) for k in fileName_list] #含全部路徑,['', 
                                                                      #             '']
    #return fileName_list
    return filePath_list

dirPath='C:/Users/lenovo/Desktop/lian'
a=get_filePathList(dirPath,'.JPG')
a
#print(len(a))

在這裡插入圖片描述

2.5 檢查是否有圖片漏標,並刪除漏標圖片

def delete_file(filePath):
    if not os.path.exists(filePath): #filePath指C:/Users/lenovo/Desktop/lianxi/img\\20190720072950_000256_cc8cdaa64390.JPG'
        print('%s 這個檔案路徑不存在,請檢查一下' %filePath)
    else:
        print('%s 這個路徑的檔案需手動刪除' %filePath)
        
def check_1(dirPath, suffix): 
    xmlFilePath_list = get_filePathList(dirPath, '.xml') # 與suffix不同,自己指定'.xml'
    xmlFilePathPrefix_list = [k[:-4] for k in xmlFilePath_list] # 不帶.xml
    xmlFilePathPrefix_set = set(xmlFilePathPrefix_list)
    #print(xmlFilePathPrefix_set) #{'絕對路徑不帶字尾',
                                 # '                ' }
    imageFilePath_list = get_filePathList(dirPath, suffix)
    imageFilePathPrefix_list = [k[:-4] for k in imageFilePath_list] # 不帶字尾
    imageFilePathPrefix_set = set(imageFilePathPrefix_list)
    #print(imageFilePathPrefix_set)
    
    redundant_imgFilePathPrefix_list = list(imageFilePathPrefix_set - xmlFilePathPrefix_set)
    redundant_imgFilePath_list = [k+'.JPG' for k in redundant_imgFilePathPrefix_list]
    #上行帶.JPG字尾, 如果自定義.0JPG,顯示這個檔案路徑不存在,請檢查一下
    for imgFilePath in redundant_imgFilePath_list: 
        delete_file(imgFilePath)
               
dirPath='C:/Users/lenovo/Desktop/lx'            
check_1(dirPath,'.JPG')

在這裡插入圖片描述

2.6 檢測標記的box是否超過圖片的邊界,若有則顯示刪除與box相關的xml檔案和圖片檔案

import xml.etree.ElementTree as ET
from PIL import Image
def check_2(dirPath, suffix):
    xmlFilePath_list = get_filePathList(dirPath, '.xml')
    #print(xmlFilePath_list) #['.xml全部路徑',
                            # '            ']
        
    allFileCorrect = True # 跳出for迴圈則執行 if allFileCorrect
    for xmlFilePath in xmlFilePath_list:
        imageFilePath = xmlFilePath[:-4] + '.' + suffix.strip('.')
        #print(xmlFilePath) 
        #print(imageFilePath)
        #C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.xml
        #C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.JPG
        #.....
       
        image = Image.open(imageFilePath)
        width, height = image.size
        with open(xmlFilePath) as file:
            fileContent = file.read()
        #print(fileContent)  #<annotation>...
        
        root = ET.XML(fileContent) #根<annotation>...
        object_list = root.findall('object') # <object>
        for object_item in object_list:
            bndbox = object_item.find('bndbox') #<bndbox>
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            if xmax>xmin and ymax>ymin and xmax<=width and ymax<=height:
                continue
            else:
                delete_file(xmlFilePath)
                delete_file(imageFilePath)
                allFileCorrect = False
                break
    
    if allFileCorrect:
        print('祝賀你! 已經通過檢驗,所有xml檔案中的標註框都沒有越界')
dirPath='C:/Users/lenovo/Desktop/lx' #lx資料夾裡.xml和.JPG混在一起
check_2(dirPath,'.JPG')#''裡必須.JPG或不填

在這裡插入圖片描述

2.7 檢查xmin<0…,並修改xmin…

#coding=utf-8
import os
import shutil
import random
from xml.etree.ElementTree import ElementTree,Element
import cv2

def read_xml(in_path):
  '''
    讀取並解析xml檔案
    in_path: xml路徑
    return: ElementTree
  '''
  tree = ElementTree()
  tree.parse(in_path)
  return tree

def check():
    url = "C:/Users/lenovo/Desktop/source/xml_sum" # xml_sum只存放xml的資料夾
    for item in os.listdir(url): # item為.xml檔案
        tree = read_xml(url + "/" + item) # read_xml函式上面定義
        root = tree.getroot()
        object = root.findall("object")
        size = root.find("size")
        width =int(size.find("width").text)
        height = int(size.find("height").text)
        if object == None:
            print(item)
            continue
        for it in object:
            bndbox = it.find("bndbox")
            if bndbox == None:
                print(item)
            xmin = int(bndbox.find("xmin").text)
            xmax = int(bndbox.find("xmax").text)
            ymin = int(bndbox.find("ymin").text)
            ymax = int(bndbox.find("ymax").text)
            if  xmin <= 0 or xmin >= xmax or ymin <=0 or ymin >= ymax:
                print(item)
            if xmax > width or ymax> height:
                print(item)

if __name__ =='__main__':
    check() # 不輸出則表示全對。輸出123111.xml,沒有列表引號
import xml.etree.ElementTree as ET
def generateNewXmlFile(old_xmlFilePath, new_xmlFilePath):
    with open(old_xmlFilePath) as file:
        fileContent = file.read()
    root = ET.XML(fileContent)
    object_list = root.findall('object')
    for object_item in object_list:
        bndbox = object_item.find('bndbox')
        xmin = bndbox.find('xmin')
        xminValue = int(xmin.text)
        xmin.text = str(int(xminValue + 1))
        ymin = bndbox.find('ymin')
        yminValue = int(ymin.text)
        ymin.text = str(int(yminValue + 1))
        xmax = bndbox.find('xmax')
        xmaxValue = int(xmax.text)
        xmax.text = str(int(xmaxValue + 1))
        ymax = bndbox.find('ymax')
        ymaxValue = int(ymax.text)
        ymax.text = str(int(ymaxValue + 1))
    tree = ET.ElementTree(root)
    tree.write(new_xmlFilePath)
old_dirPath ='C:/Users/lenovo/Desktop/999/8'
new_dirPath ='C:/Users/lenovo/Desktop/999/9'

def batch_modify_xml(old_dirPath, new_dirPath): #修改資料夾中的若干xml檔案
    #以下4行將new_dirPath和xmlFileName名稱結合,內容是呼叫generateNewXmlFile函式改寫
    xmlFilePath_list = get_filePathList(old_dirPath, '.xml')
    for xmlFilePath in xmlFilePath_list:
        xmlFileName = os.path.split(xmlFilePath)[1] #1後
        #print(xmlFileName) #輸出 20190720073950_000257_cc8cdaa64390.xml
        new_xmlFilePath = os.path.join(new_dirPath, xmlFileName)
        
        generateNewXmlFile(xmlFilePath, new_xmlFilePath) 

batch_modify_xml(old_dirPath, new_dirPath)

2.8 讀取classname

def get_classNameList(txtFilePath):
    with open(txtFilePath, 'r', encoding='utf8') as file:
        fileContent = file.read()
        line_list = [k.strip() for k in fileContent.split('\n') if k.strip()!='']
        className_list= sorted(line_list, reverse=False)
    return className_list
txtFilePath='C:/Users/lenovo/Desktop/labelImg/data/predefined_classes -outofstock.txt'
get_classNameList(txtFilePath)

在這裡插入圖片描述

import os
pathnoname,name=os.path.split("E:/lpthw/zedshaw/ex19.py")
print(pathnoname)
print(name)

在這裡插入圖片描述

# 新增環境變數
import sys
sys.path.append('')

2.9 檢查trainval.txt

import cv2
from os import listdir
from os.path import isfile,isdir,join

trainval_list = list()
with open('./trainval.txt','r') as f:
    for line in f.readlines():
        line = line.strip('\n')
        a = line +'.jpg'
        trainval_list.append(a)
print(trainval_list)  

在這裡插入圖片描述

for i in trainval_list:
    img_path = '{}{}'.format('./img3/',i)
    img = cv2.imread(img_path) 
    try:
        img.shape
        print(img.shape) # 在img3資料夾中沒有......11111.jpg圖片
    except:
        print('fail read:' + img_path)
        continue

在這裡插入圖片描述
B站/知乎/微信公眾號:碼農程式設計錄
在這裡插入圖片描述