Refactoring Python Code: DO NOT REPEAT YOURSELF principles

[TIPS] Refactoring - Clean Code - Tip 1 - DRY

Junior: May write repetitive code without abstracting common functionality.
Senior: Identifies repetitive patterns and abstracts them into functions or classes.

Absolutely, the DRY principle is fundamental in writing clean, maintainable code. Here's an example to illustrate the difference between how a junior and a senior developer might approach this principle

Example 1: Refactoring a Data Processing Pipeline

Junior Developer's Approach

A junior developer might write a data processing script with repetitive code for handling different data sources.

def process_csv(file_path): data = [] with open(file_path, 'r') as file: for line in file: data.append(line.strip().split(',')) return data

def process_json(file_path): import json with open(file_path, 'r') as file: data = json.load(file) return data

def process_xml(file_path): import xml.etree.ElementTree as ET tree = ET.parse(file_path) root = tree.getroot() data = [{child.tag: child.text for child in elem} for elem in root] return data

# Usage csv_data = process_csv('data.csv') json_data = process_json('data.json') xml_data = process_xml('data.xml')

Senior Developer's Approach

A senior developer would recognize the common pattern in these functions and refactor the code to use a more modular approach

import json import xml.etree.ElementTree as ET

class DataProcessor: def process(self, file_path): raise NotImplementedError("This method should be overridden by subclasses")

class CSVProcessor(DataProcessor): def process(self, file_path): data = [] with open(file_path, 'r') as file: for line in file: data.append(line.strip().split(',')) return data

class JSONProcessor(DataProcessor): def process(self, file_path): with open(file_path, 'r') as file: data = json.load(file) return data

class XMLProcessor(DataProcessor): def process(self, file_path): tree = ET.parse(file_path) root = tree.getroot() data = [{child.tag: child.text for child in elem} for elem in root] return data

def process_file(file_path, processor): return processor.process(file_path)

# Usage csv_processor = CSVProcessor() json_processor = JSONProcessor() xml_processor = XMLProcessor()

csv_data = process_file('data.csv', csv_processor) json_data = process_file('data.json', json_processor) xml_data = process_file('data.xml', xml_processor) ############################################### # Or we can do sth like

class FileTypeMixin: @staticmethod def get_processor(file_path): _, ext = os.path.splitext(file_path) if ext == '.csv': return CSVProcessor() elif ext == '.json': return JSONProcessor() elif ext == '.xml': return XMLProcessor() else: raise ValueError(f"Unsupported file type: {ext}")

def process_file(file_path): processor = FileTypeMixin.get_processor(file_path) return processor.process(file_path)

# Usage csv_data = process_file('data.csv') json_data = process_file('data.json') xml_data = process_file('data.xml')

Example 2: Refactoring a Web Scraping Script

Junior Developer's Approach

A junior developer might write a web scraping script with repetitive code for handling different web pages.

import requests from bs4 import BeautifulSoup

def scrape_page1(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') data = soup.find_all('div', class_='data1') return [item.text for item in data]

def scrape_page2(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') data = soup.find_all('span', class_='data2') return [item.text for item in data]

# Usage data1 = scrape_page1('https://example.com/page1') data2 = scrape_page2('https://example.com/page2')

Senior Developer's Approach

A senior developer would abstract the common scraping logic into a base class and create specific scrapers for different pages.

import requests from bs4 import BeautifulSoup

class BaseScraper: def scrape(self, url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') return self.parse(soup)

def parse(self, soup): raise NotImplementedError("This method should be overridden by subclasses")

class Page1Scraper(BaseScraper): def parse(self, soup): data = soup.find_all('div', class_='data1') return [item.text for item in data]

class Page2Scraper(BaseScraper): def parse(self, soup): data = soup.find_all('span', class_='data2') return [item.text for item in data]

def scrape_url(url, scraper): return scraper.scrape(url)

# Usage page1_scraper = Page1Scraper() page2_scraper = Page2Scraper()

data1 = scrape_url('https://example.com/page1', page1_scraper) data2 = scrape_url('https://example.com/page2', page2_scraper)

#############################################################
# Or sth else

class URLTypeMixin: @staticmethod def get_scraper(url): if 'page1' in url: return Page1Scraper() elif 'page2' in url: return Page2Scraper() else: raise ValueError(f"Unsupported URL type: {url}")

def scrape_url(url): scraper = URLTypeMixin.get_scraper(url) return scraper.scrape(url)

# Usage data1 = scrape_url('https://example.com/page1') data2 = scrape_url('https://example.com/page2')

#############################################################
# Or sth better

class URLTypeMixin: @staticmethod def get_scraper(url): urls_map = { 'page1': Page1Scraper, 'page2': Page2Scraper, } for page_name, page_scraper_class in urls_map.items(): if page_name in url: return page_scraper_class()

def scrape_url(url): scraper = URLTypeMixin.get_scraper(url) return scraper.scrape(url)

# Usage data1 = scrape_url('https://example.com/page1') data2 = scrape_url('https://example.com/page2')

Example 3: Refactoring API Integration

Junior Developer's Approach

A junior developer might write code to integrate with different APIs with repetitive request handling.

import requests

def get_user_data(api_url): response = requests.get(api_url + '/user') return response.json()

def get_order_data(api_url): response = requests.get(api_url + '/order') return response.json()

def get_product_data(api_url): response = requests.get(api_url + '/product') return response.json()

# Usage user_data = get_user_data('https://api.example.com') order_data = get_order_data('https://api.example.com') product_data = get_product_data('https://api.example.com')

Senior Developer's Approach

A senior developer would refactor the code to use a common API client class.

import requests

class APIClient: def __init__(self, base_url): self.base_url = base_url

def get(self, endpoint): response = requests.get(f"{self.base_url}/{endpoint}") response.raise_for_status() return response.json()

class UserService: def __init__(self, client): self.client = client

def get_user_data(self): return self.client.get('user')

class OrderService: def __init__(self, client): self.client = client

def get_order_data(self): return self.client.get('order')

class ProductService: def __init__(self, client): self.client = client

def get_product_data(self): return self.client.get('product')

# Usage api_client = APIClient('https://api.example.com') user_service = UserService(api_client) order_service = OrderService(api_client) product_service = ProductService(api_client)

user_data = user_service.get_user_data() order_data = order_service.get_order_data() product_data = product_service.get_product_data()

#############################################################
### OR

import requests

class APIClient: def __init__(self, base_url): self.base_url = base_url

def get(self, endpoint): response = requests.get(f"{self.base_url}/{endpoint}") response.raise_for_status() return response.json() def get_user_data(self): return self.get('user')

def get_order_data(self): return self.get('order')

def get_product_data(self): return self.get('product')

# Usage api_client = APIClient('https://api.example.com')

user_data = api_client.get_user_data() order_data = api_client.get_order_data() product_data = api_client.get_product_data()

These examples demonstrate how a senior developer's approach to refactoring can lead to more modular, maintainable, and reusable code, following principles such as DRY, single responsibility, and clear separation of concerns.