Migrating Jenkins SCM Using GitLab from Bitbucket: SCM URL Bulk Replacement
When migrating from one source control management (SCM) system to another, one of the most tedious tasks is updating all your Jenkins job configurations to point to the new repository URLs. This article presents a solution to automate this process using Python and the Jenkins API, making your migration to GitLab smooth and efficient. The Challenge Imagine you have hundreds of Jenkins jobs configured to pull from your old SCM system, and you need to update all of them to use GitLab repositories instead. Doing this manually would be: Time-consuming Error-prone Disruptive to your development workflow Our solution uses Python scripts to: Inventory all Jenkins jobs and their current SCM URLs Replace old SCM URLs with new GitLab URLs in bulk Prerequisites Python 3.6+ Jenkins access with API token new GitLab URLs Part 1: Discovering Your Jenkins Jobs The first step is to create an inventory of all your Jenkins jobs and their SCM URLs. This script recursively traverses your Jenkins folders and extracts the SCM URL from each job's configuration. import requests import xml.etree.ElementTree as ET import csv from urllib.parse import quote Jenkins connection details JENKINS_URL = "https://jenkins.example.com" USERNAME = "your_username" API_TOKEN = "your_api_token" # Generate this in Jenkins user settings FOLDER_PATH = "job/folder_name" # Starting folder path # Authentication auth = (USERNAME, API_TOKEN) def get_folder_jobs(folder_path): """Recursively get all jobs in a folder""" # URL encode each part of the path path_parts = folder_path.split("/job/") encoded_path = "/job/".join([quote(part) for part in path_parts if part]) # Handle the case where folder_path doesn't start with /job/ if not folder_path.startswith("/job/") and not encoded_path.startswith("/"): api_url = f"{JENKINS_URL}/{encoded_path}/api/json?tree=jobs[name,url,_class]" else: api_url = f"{JENKINS_URL}{encoded_path}/api/json?tree=jobs[name,url,_class]" print(f"Requesting: {api_url}") response = requests.get(api_url, auth=auth) if response.status_code != 200: print(f"Error accessing folder: {response.status_code}") return [] data = response.json() all_jobs = [] for job in data.get('jobs', []): job_class = job.get('_class', '') job_name = job.get('name', '') job_url = job.get('url', '') # If it's a folder, recursively get its jobs if 'folder' in job_class.lower(): sub_folder_path = f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}" sub_jobs = get_folder_jobs(sub_folder_path) all_jobs.extend(sub_jobs) # If it's a job, add it to the list elif 'job' in job_class.lower() or 'project' in job_class.lower() or 'workflow' in job_class.lower(): all_jobs.append({ 'name': job_name, 'url': job_url, 'full_path': f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}" }) return all_jobs def get_job_config(job_url): """Get the XML configuration of a job""" config_url = f"{job_url}config.xml" response = requests.get(config_url, auth=auth) if response.status_code != 200: print(f"Error getting config for {job_url}: {response.status_code}") return None return response.text def extract_scm_url(config_xml): """Extract SCM URL from config XML""" try: root = ET.fromstring(config_xml) # Find Git SCM URLs url_elements = root.findall(".//hudson.plugins.git.UserRemoteConfig/url") if url_elements and len(url_elements) > 0: return url_elements[0].text # Check for MultiBranch Pipeline SCM URLs url_elements = root.findall(".//source/remote") if url_elements and len(url_elements) > 0: return url_elements[0].text return None except Exception as e: print(f"Error parsing XML: {e}") return None def main(): # Get all jobs in the folder print(f"Getting jobs from folder: {FOLDER_PATH}") jobs = get_folder_jobs(FOLDER_PATH) print(f"Found {len(jobs)} jobs") # Process each job to extract SCM URL job_scm_urls = [] for job in jobs: print(f"Processing job: {job['name']}") config_xml = get_job_config(job['url']) if not config_xml: continue scm_url = extract_scm_url(config_xml) if scm_url: job_scm_urls.append({ 'name': job['name'], 'full_path': job['full_path'], 'url': job['url'], 'scm_url': scm_url }) print(f"Found SCM URL for {job['name']}: {scm_url}") else: print(f"No SCM URL found for {job['name']}") # Print summary print(f"\nFound SCM URLs for {len(job_scm_urls)} jobs out of {len(jobs)} total jobs"

When migrating from one source control management (SCM) system to another, one of the most tedious tasks is updating all your Jenkins job configurations to point to the new repository URLs. This article presents a solution to automate this process using Python and the Jenkins API, making your migration to GitLab smooth and efficient.
The Challenge
Imagine you have hundreds of Jenkins jobs configured to pull from your old SCM system, and you need to update all of them to use GitLab repositories instead. Doing this manually would be:
- Time-consuming
- Error-prone
- Disruptive to your development workflow
Our solution uses Python scripts to:
- Inventory all Jenkins jobs and their current SCM URLs
- Replace old SCM URLs with new GitLab URLs in bulk
Prerequisites
- Python 3.6+
- Jenkins access with API token
- new GitLab URLs
Part 1: Discovering Your Jenkins Jobs
The first step is to create an inventory of all your Jenkins jobs and their SCM URLs. This script recursively traverses your Jenkins folders and extracts the SCM URL from each job's configuration.
import requests
import xml.etree.ElementTree as ET
import csv
from urllib.parse import quote
Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token" # Generate this in Jenkins user settings
FOLDER_PATH = "job/folder_name" # Starting folder path
# Authentication
auth = (USERNAME, API_TOKEN)
def get_folder_jobs(folder_path):
"""Recursively get all jobs in a folder"""
# URL encode each part of the path
path_parts = folder_path.split("/job/")
encoded_path = "/job/".join([quote(part) for part in path_parts if part])
# Handle the case where folder_path doesn't start with /job/
if not folder_path.startswith("/job/") and not encoded_path.startswith("/"):
api_url = f"{JENKINS_URL}/{encoded_path}/api/json?tree=jobs[name,url,_class]"
else:
api_url = f"{JENKINS_URL}{encoded_path}/api/json?tree=jobs[name,url,_class]"
print(f"Requesting: {api_url}")
response = requests.get(api_url, auth=auth)
if response.status_code != 200:
print(f"Error accessing folder: {response.status_code}")
return []
data = response.json()
all_jobs = []
for job in data.get('jobs', []):
job_class = job.get('_class', '')
job_name = job.get('name', '')
job_url = job.get('url', '')
# If it's a folder, recursively get its jobs
if 'folder' in job_class.lower():
sub_folder_path = f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
sub_jobs = get_folder_jobs(sub_folder_path)
all_jobs.extend(sub_jobs)
# If it's a job, add it to the list
elif 'job' in job_class.lower() or 'project' in job_class.lower() or 'workflow' in job_class.lower():
all_jobs.append({
'name': job_name,
'url': job_url,
'full_path': f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
})
return all_jobs
def get_job_config(job_url):
"""Get the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.get(config_url, auth=auth)
if response.status_code != 200:
print(f"Error getting config for {job_url}: {response.status_code}")
return None
return response.text
def extract_scm_url(config_xml):
"""Extract SCM URL from config XML"""
try:
root = ET.fromstring(config_xml)
# Find Git SCM URLs
url_elements = root.findall(".//hudson.plugins.git.UserRemoteConfig/url")
if url_elements and len(url_elements) > 0:
return url_elements[0].text
# Check for MultiBranch Pipeline SCM URLs
url_elements = root.findall(".//source/remote")
if url_elements and len(url_elements) > 0:
return url_elements[0].text
return None
except Exception as e:
print(f"Error parsing XML: {e}")
return None
def main():
# Get all jobs in the folder
print(f"Getting jobs from folder: {FOLDER_PATH}")
jobs = get_folder_jobs(FOLDER_PATH)
print(f"Found {len(jobs)} jobs")
# Process each job to extract SCM URL
job_scm_urls = []
for job in jobs:
print(f"Processing job: {job['name']}")
config_xml = get_job_config(job['url'])
if not config_xml:
continue
scm_url = extract_scm_url(config_xml)
if scm_url:
job_scm_urls.append({
'name': job['name'],
'full_path': job['full_path'],
'url': job['url'],
'scm_url': scm_url
})
print(f"Found SCM URL for {job['name']}: {scm_url}")
else:
print(f"No SCM URL found for {job['name']}")
# Print summary
print(f"\nFound SCM URLs for {len(job_scm_urls)} jobs out of {len(jobs)} total jobs")
# Export results to CSV
with open('jenkins_scm_urls.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(["Job Name", "Full Path", "Jenkins URL", "SCM URL"])
for job in job_scm_urls:
writer.writerow([job['name'], job['full_path'], job['url'], job['scm_url']])
print("\nExported results to jenkins_scm_urls.csv")
if __name__ == "__main__":
main()
Part 2: Preparing Your URL Mapping
After running the first script, you'll have a CSV file with all your Jenkins jobs and their current SCM URLs.
Now you need to create a mapping file that specifies the old and new URLs. Create a new CSV file called replace.csv with these columns:
- Jenkins URL
- Old SCM URL
- New SCM URL
Example:
You can use Excel or a script to help generate this mapping based on patterns in your repository structure.
Part 3: Updating Jenkins Jobs
Now comes to updating all your Jenkins jobs to use the new GitLab URLs:
import requests
import xml.etree.ElementTree as ET
import base64
import csv
import time
# Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token"
CSV_FILE = "replace.csv" # CSV with columns: Jenkins URL, Old SCM URL, New SCM URL
# Authentication headers
headers = {
'Authorization': 'Basic ' + base64.b64encode(f"{USERNAME}:{API_TOKEN}".encode()).decode(),
'Content-Type': 'application/xml'
}
def get_job_config(job_url):
"""Get the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.get(config_url, headers=headers)
if response.status_code != 200:
print(f"Error getting config for {job_url}: {response.status_code}")
return None
return response.text
def update_job_config(job_url, config_xml):
"""Update the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.post(config_url, headers=headers, data=config_xml)
return response.status_code in [200, 201]
def update_scm_url_in_xml(config_xml, old_url, new_url):
"""Update SCM URL in job config XML"""
try:
root = ET.fromstring(config_xml)
updated = False
# Update Git SCM URLs
for url_elem in root.findall(".//hudson.plugins.git.UserRemoteConfig/url"):
if url_elem.text == old_url:
url_elem.text = new_url
updated = True
# Update MultiBranch Pipeline SCM URLs
for url_elem in root.findall(".//source/remote"):
if url_elem.text == old_url:
url_elem.text = new_url
updated = True
if updated:
return ET.tostring(root, encoding='utf-8').decode('utf-8')
return None
except Exception as e:
print(f"Error updating XML: {e}")
return None
def main():
updated_jobs = 0
failed_jobs = 0
skipped_jobs = 0
with open(CSV_FILE, 'r') as csvfile:
reader = csv.DictReader(csvfile)
total_rows = sum(1 for _ in open(CSV_FILE)) - 1 # Subtract header row
print(f"Found {total_rows} jobs to update in {CSV_FILE}")
# Reset file pointer
csvfile.seek(0)
next(reader) # Skip header
for row in reader:
job_url = row.get('Jenkins URL', '').strip()
old_scm_url = row.get('Old SCM URL', '').strip()
new_scm_url = row.get('New SCM URL', '').strip()
if not job_url or not old_scm_url or not new_scm_url:
print(f"Skipping row with missing data: {row}")
skipped_jobs += 1
continue
print(f"Processing job: {job_url}")
print(f" Changing SCM from: {old_scm_url}")
print(f" To: {new_scm_url}")
# Get current config
config_xml = get_job_config(job_url)
if not config_xml:
print(f" Failed to get config for {job_url}")
failed_jobs += 1
continue
# Update SCM URL in config
updated_xml = update_scm_url_in_xml(config_xml, old_scm_url, new_scm_url)
if not updated_xml:
print(f" No matching SCM URL found in config or error updating XML")
failed_jobs += 1
continue
# Update job config
if update_job_config(job_url, updated_xml):
print(f" ✅ Successfully updated {job_url}")
updated_jobs += 1
else:
print(f" ❌ Failed to update {job_url}")
failed_jobs += 1
# Add a small delay to avoid overwhelming the Jenkins server
time.sleep(1)
print(f"\n