[TIPS] Refactoring - Clean Code - Tip 1 - DRY

By JoeVu, at: 2024年6月26日9:39

Estimated Reading Time: 7 min read

[TIPS] Refactoring - Clean Code - Tip 1 - DRY
[TIPS] Refactoring - Clean Code - Tip 1 - DRY

Reactoring tip 1: Follow the DRY Principle (Don't Repeat Yourself)

  • Junior: May write repetitive code without abstracting common functionality.
     
  • Senior: Identifies repetitive patterns and abstracts them into functions or classes.

Absolutely, the DRY principle is fundamental in writing clean, maintainable code. Here's an example to illustrate the difference between how a junior and a senior developer might approach this principle

 

Example 1: Refactoring a Data Processing Pipeline


Junior Developer's Approach

A junior developer might write a data processing script with repetitive code for handling different data sources.

def process_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            data.append(line.strip().split(','))
    return data


def process_json(file_path):
    import json
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


def process_xml(file_path):
    import xml.etree.ElementTree as ET
    tree = ET.parse(file_path)
    root = tree.getroot()
    data = [{child.tag: child.text for child in elem} for elem in root]
    return data


# Usage
csv_data = process_csv('data.csv')
json_data = process_json('data.json')
xml_data = process_xml('data.xml')

 

Senior Developer's Approach

A senior developer would recognize the common pattern in these functions and refactor the code to use a more modular approach.

import json
import xml.etree.ElementTree as ET


class DataProcessor:
    def process(self, file_path):
        raise NotImplementedError("This method should be overridden by subclasses")


class CSVProcessor(DataProcessor):
    def process(self, file_path):
        data = []
        with open(file_path, 'r') as file:
            for line in file:
                data.append(line.strip().split(','))
        return data


class JSONProcessor(DataProcessor):
    def process(self, file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data


class XMLProcessor(DataProcessor):
    def process(self, file_path):
        tree = ET.parse(file_path)
        root = tree.getroot()
        data = [{child.tag: child.text for child in elem} for elem in root]
        return data


def process_file(file_path, processor):
    return processor.process(file_path)


# Usage
csv_processor = CSVProcessor()
json_processor = JSONProcessor()
xml_processor = XMLProcessor()

csv_data = process_file('data.csv', csv_processor)
json_data = process_file('data.json', json_processor)
xml_data = process_file('data.xml', xml_processor)

###############################################
# Or we can do sth like

class FileTypeMixin:
    @staticmethod
    def get_processor(file_path):
        _, ext = os.path.splitext(file_path)
        if ext == '.csv':
            return CSVProcessor()
        elif ext == '.json':
            return JSONProcessor()
        elif ext == '.xml':
            return XMLProcessor()
        else:
            raise ValueError(f"Unsupported file type: {ext}")


def process_file(file_path):
    processor = FileTypeMixin.get_processor(file_path)
    return processor.process(file_path)


# Usage
csv_data = process_file('data.csv')
json_data = process_file('data.json')
xml_data = process_file('data.xml')

 

Example 2: Refactoring a Web Scraping Script


Junior Developer's Approach

A junior developer might write a web scraping script with repetitive code for handling different web pages.

import requests
from bs4 import BeautifulSoup


def scrape_page1(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    data = soup.find_all('div', class_='data1')
    return [item.text for item in data]


def scrape_page2(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    data = soup.find_all('span', class_='data2')
    return [item.text for item in data]


# Usage
data1 = scrape_page1('https://example.com/page1')
data2 = scrape_page2('https://example.com/page2')

 

 

Senior Developer's Approach

A senior developer would abstract the common scraping logic into a base class and create specific scrapers for different pages.

import requests
from bs4 import BeautifulSoup


class BaseScraper:
    def scrape(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return self.parse(soup)

    def parse(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")


class Page1Scraper(BaseScraper):
    def parse(self, soup):
        data = soup.find_all('div', class_='data1')
        return [item.text for item in data]


class Page2Scraper(BaseScraper):
    def parse(self, soup):
        data = soup.find_all('span', class_='data2')
        return [item.text for item in data]


def scrape_url(url, scraper):
    return scraper.scrape(url)


# Usage
page1_scraper = Page1Scraper()
page2_scraper = Page2Scraper()

data1 = scrape_url('https://example.com/page1', page1_scraper)
data2 = scrape_url('https://example.com/page2', page2_scraper)


#############################################################
# Or sth else

class URLTypeMixin:
    @staticmethod
    def get_scraper(url):
        if 'page1' in url:
            return Page1Scraper()
        elif 'page2' in url:
            return Page2Scraper()
        else:
            raise ValueError(f"Unsupported URL type: {url}")

def scrape_url(url):
    scraper = URLTypeMixin.get_scraper(url)
    return scraper.scrape(url)


# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')


 

#############################################################
# Or sth better

class URLTypeMixin:
    @staticmethod
    def get_scraper(url):
        urls_map = {
            'page1': Page1Scraper,
            'page2': Page2Scraper,
        }
        for page_name, page_scraper_class in urls_map.items():
            if page_name in url:
                return page_scraper_class()


def scrape_url(url):
    scraper = URLTypeMixin.get_scraper(url)
    return scraper.scrape(url)


# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')

 

Example 3: Refactoring API Integration


Junior Developer's Approach

A junior developer might write code to integrate with different APIs with repetitive request handling.

import requests


def get_user_data(api_url):
    response = requests.get(api_url + '/user')
    return response.json()


def get_order_data(api_url):
    response = requests.get(api_url + '/order')
    return response.json()


def get_product_data(api_url):
    response = requests.get(api_url + '/product')
    return response.json()


# Usage
user_data = get_user_data('https://api.example.com')
order_data = get_order_data('https://api.example.com')
product_data = get_product_data('https://api.example.com')

 

Senior Developer's Approach

A senior developer would refactor the code to use a common API client class.

import requests


class APIClient:
    def __init__(self, base_url):
        self.base_url = base_url

    def get(self, endpoint):
        response = requests.get(f"{self.base_url}/{endpoint}")
        response.raise_for_status()
        return response.json()


class UserService:
    def __init__(self, client):
        self.client = client

    def get_user_data(self):
        return self.client.get('user')


class OrderService:
    def __init__(self, client):
        self.client = client

    def get_order_data(self):
        return self.client.get('order')


class ProductService:
    def __init__(self, client):
        self.client = client

    def get_product_data(self):
        return self.client.get('product')

# Usage
api_client = APIClient('https://api.example.com')
user_service = UserService(api_client)
order_service = OrderService(api_client)
product_service = ProductService(api_client)

user_data = user_service.get_user_data()
order_data = order_service.get_order_data()
product_data = product_service.get_product_data()


#############################################################
### OR

import requests

class APIClient:
    def __init__(self, base_url):
        self.base_url = base_url

    def get(self, endpoint):
        response = requests.get(f"{self.base_url}/{endpoint}")
        response.raise_for_status()
        return response.json()

    def get_user_data(self):
        return self.get('user')

    def get_order_data(self):
        return self.get('order')

    def get_product_data(self):
        return self.get('product')

# Usage
api_client = APIClient('https://api.example.com')

user_data = api_client.get_user_data()
order_data = api_client.get_order_data()
product_data = api_client.get_product_data()


These examples demonstrate how a senior developer's approach to refactoring can lead to more modular, maintainable, and reusable code, following principles such as DRY, single responsibility, and clear separation of concerns.

 


Subscribe

Subscribe to our newsletter and never miss out lastest news.