[TIPS] Refactoring - Clean Code - Tip 1 - DRY
By JoeVu, at: 2024年6月26日9:39
Reactoring tip 1: Follow the DRY Principle (Don't Repeat Yourself)
- Junior: May write repetitive code without abstracting common functionality.
- Senior: Identifies repetitive patterns and abstracts them into functions or classes.
Absolutely, the DRY principle is fundamental in writing clean, maintainable code. Here's an example to illustrate the difference between how a junior and a senior developer might approach this principle
Example 1: Refactoring a Data Processing Pipeline
Junior Developer's Approach
A junior developer might write a data processing script with repetitive code for handling different data sources.
def process_csv(file_path):
data = []
with open(file_path, 'r') as file:
for line in file:
data.append(line.strip().split(','))
return data
def process_json(file_path):
import json
with open(file_path, 'r') as file:
data = json.load(file)
return data
def process_xml(file_path):
import xml.etree.ElementTree as ET
tree = ET.parse(file_path)
root = tree.getroot()
data = [{child.tag: child.text for child in elem} for elem in root]
return data
# Usage
csv_data = process_csv('data.csv')
json_data = process_json('data.json')
xml_data = process_xml('data.xml')
Senior Developer's Approach
A senior developer would recognize the common pattern in these functions and refactor the code to use a more modular approach.
import json
import xml.etree.ElementTree as ET
class DataProcessor:
def process(self, file_path):
raise NotImplementedError("This method should be overridden by subclasses")
class CSVProcessor(DataProcessor):
def process(self, file_path):
data = []
with open(file_path, 'r') as file:
for line in file:
data.append(line.strip().split(','))
return data
class JSONProcessor(DataProcessor):
def process(self, file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
class XMLProcessor(DataProcessor):
def process(self, file_path):
tree = ET.parse(file_path)
root = tree.getroot()
data = [{child.tag: child.text for child in elem} for elem in root]
return data
def process_file(file_path, processor):
return processor.process(file_path)
# Usage
csv_processor = CSVProcessor()
json_processor = JSONProcessor()
xml_processor = XMLProcessor()
csv_data = process_file('data.csv', csv_processor)
json_data = process_file('data.json', json_processor)
xml_data = process_file('data.xml', xml_processor)
###############################################
# Or we can do sth like
class FileTypeMixin:
@staticmethod
def get_processor(file_path):
_, ext = os.path.splitext(file_path)
if ext == '.csv':
return CSVProcessor()
elif ext == '.json':
return JSONProcessor()
elif ext == '.xml':
return XMLProcessor()
else:
raise ValueError(f"Unsupported file type: {ext}")
def process_file(file_path):
processor = FileTypeMixin.get_processor(file_path)
return processor.process(file_path)
# Usage
csv_data = process_file('data.csv')
json_data = process_file('data.json')
xml_data = process_file('data.xml')
Example 2: Refactoring a Web Scraping Script
Junior Developer's Approach
A junior developer might write a web scraping script with repetitive code for handling different web pages.
import requests
from bs4 import BeautifulSoup
def scrape_page1(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = soup.find_all('div', class_='data1')
return [item.text for item in data]
def scrape_page2(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = soup.find_all('span', class_='data2')
return [item.text for item in data]
# Usage
data1 = scrape_page1('https://example.com/page1')
data2 = scrape_page2('https://example.com/page2')
Senior Developer's Approach
A senior developer would abstract the common scraping logic into a base class and create specific scrapers for different pages.
import requests
from bs4 import BeautifulSoup
class BaseScraper:
def scrape(self, url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
return self.parse(soup)
def parse(self, soup):
raise NotImplementedError("This method should be overridden by subclasses")
class Page1Scraper(BaseScraper):
def parse(self, soup):
data = soup.find_all('div', class_='data1')
return [item.text for item in data]
class Page2Scraper(BaseScraper):
def parse(self, soup):
data = soup.find_all('span', class_='data2')
return [item.text for item in data]
def scrape_url(url, scraper):
return scraper.scrape(url)
# Usage
page1_scraper = Page1Scraper()
page2_scraper = Page2Scraper()
data1 = scrape_url('https://example.com/page1', page1_scraper)
data2 = scrape_url('https://example.com/page2', page2_scraper)
#############################################################
# Or sth else
class URLTypeMixin:
@staticmethod
def get_scraper(url):
if 'page1' in url:
return Page1Scraper()
elif 'page2' in url:
return Page2Scraper()
else:
raise ValueError(f"Unsupported URL type: {url}")
def scrape_url(url):
scraper = URLTypeMixin.get_scraper(url)
return scraper.scrape(url)
# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')
#############################################################
# Or sth better
class URLTypeMixin:
@staticmethod
def get_scraper(url):
urls_map = {
'page1': Page1Scraper,
'page2': Page2Scraper,
}
for page_name, page_scraper_class in urls_map.items():
if page_name in url:
return page_scraper_class()
def scrape_url(url):
scraper = URLTypeMixin.get_scraper(url)
return scraper.scrape(url)
# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')
Example 3: Refactoring API Integration
Junior Developer's Approach
A junior developer might write code to integrate with different APIs with repetitive request handling.
import requests
def get_user_data(api_url):
response = requests.get(api_url + '/user')
return response.json()
def get_order_data(api_url):
response = requests.get(api_url + '/order')
return response.json()
def get_product_data(api_url):
response = requests.get(api_url + '/product')
return response.json()
# Usage
user_data = get_user_data('https://api.example.com')
order_data = get_order_data('https://api.example.com')
product_data = get_product_data('https://api.example.com')
Senior Developer's Approach
A senior developer would refactor the code to use a common API client class.
import requests
class APIClient:
def __init__(self, base_url):
self.base_url = base_url
def get(self, endpoint):
response = requests.get(f"{self.base_url}/{endpoint}")
response.raise_for_status()
return response.json()
class UserService:
def __init__(self, client):
self.client = client
def get_user_data(self):
return self.client.get('user')
class OrderService:
def __init__(self, client):
self.client = client
def get_order_data(self):
return self.client.get('order')
class ProductService:
def __init__(self, client):
self.client = client
def get_product_data(self):
return self.client.get('product')
# Usage
api_client = APIClient('https://api.example.com')
user_service = UserService(api_client)
order_service = OrderService(api_client)
product_service = ProductService(api_client)
user_data = user_service.get_user_data()
order_data = order_service.get_order_data()
product_data = product_service.get_product_data()
#############################################################
### OR
import requests
class APIClient:
def __init__(self, base_url):
self.base_url = base_url
def get(self, endpoint):
response = requests.get(f"{self.base_url}/{endpoint}")
response.raise_for_status()
return response.json()
def get_user_data(self):
return self.get('user')
def get_order_data(self):
return self.get('order')
def get_product_data(self):
return self.get('product')
# Usage
api_client = APIClient('https://api.example.com')
user_data = api_client.get_user_data()
order_data = api_client.get_order_data()
product_data = api_client.get_product_data()
These examples demonstrate how a senior developer's approach to refactoring can lead to more modular, maintainable, and reusable code, following principles such as DRY, single responsibility, and clear separation of concerns.