[TIPS] Refactoring - Clean Code - Tip 1 - DRY

By JoeVu, at: June 26, 2024, 9:39 a.m.

Estimated Reading Time: 7 min read

[TIPS] Refactoring - Clean Code - Tip 1 - DRY
[TIPS] Refactoring - Clean Code - Tip 1 - DRY

Reactoring tip 1: Follow the DRY Principle (Don't Repeat Yourself)

  • Junior: May write repetitive code without abstracting common functionality.
     
  • Senior: Identifies repetitive patterns and abstracts them into functions or classes.

Absolutely, the DRY principle is fundamental in writing clean, maintainable code. Here's an example to illustrate the difference between how a junior and a senior developer might approach this principle

 

Example 1: Refactoring a Data Processing Pipeline


Junior Developer's Approach

A junior developer might write a data processing script with repetitive code for handling different data sources.

def process_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            data.append(line.strip().split(','))
    return data


def process_json(file_path):
    import json
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


def process_xml(file_path):
    import xml.etree.ElementTree as ET
    tree = ET.parse(file_path)
    root = tree.getroot()
    data = [{child.tag: child.text for child in elem} for elem in root]
    return data


# Usage
csv_data = process_csv('data.csv')
json_data = process_json('data.json')
xml_data = process_xml('data.xml')

 

Senior Developer's Approach

A senior developer would recognize the common pattern in these functions and refactor the code to use a more modular approach.

import json
import xml.etree.ElementTree as ET


class DataProcessor:
    def process(self, file_path):
        raise NotImplementedError("This method should be overridden by subclasses")


class CSVProcessor(DataProcessor):
    def process(self, file_path):
        data = []
        with open(file_path, 'r') as file:
            for line in file:
                data.append(line.strip().split(','))
        return data


class JSONProcessor(DataProcessor):
    def process(self, file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data


class XMLProcessor(DataProcessor):
    def process(self, file_path):
        tree = ET.parse(file_path)
        root = tree.getroot()
        data = [{child.tag: child.text for child in elem} for elem in root]
        return data


def process_file(file_path, processor):
    return processor.process(file_path)


# Usage
csv_processor = CSVProcessor()
json_processor = JSONProcessor()
xml_processor = XMLProcessor()

csv_data = process_file('data.csv', csv_processor)
json_data = process_file('data.json', json_processor)
xml_data = process_file('data.xml', xml_processor)

###############################################
# Or we can do sth like

class FileTypeMixin:
    @staticmethod
    def get_processor(file_path):
        _, ext = os.path.splitext(file_path)
        if ext == '.csv':
            return CSVProcessor()
        elif ext == '.json':
            return JSONProcessor()
        elif ext == '.xml':
            return XMLProcessor()
        else:
            raise ValueError(f"Unsupported file type: {ext}")


def process_file(file_path):
    processor = FileTypeMixin.get_processor(file_path)
    return processor.process(file_path)


# Usage
csv_data = process_file('data.csv')
json_data = process_file('data.json')
xml_data = process_file('data.xml')

 

Example 2: Refactoring a Web Scraping Script


Junior Developer's Approach

A junior developer might write a web scraping script with repetitive code for handling different web pages.

import requests
from bs4 import BeautifulSoup


def scrape_page1(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    data = soup.find_all('div', class_='data1')
    return [item.text for item in data]


def scrape_page2(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    data = soup.find_all('span', class_='data2')
    return [item.text for item in data]


# Usage
data1 = scrape_page1('https://example.com/page1')
data2 = scrape_page2('https://example.com/page2')

 

 

Senior Developer's Approach

A senior developer would abstract the common scraping logic into a base class and create specific scrapers for different pages.

import requests
from bs4 import BeautifulSoup


class BaseScraper:
    def scrape(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return self.parse(soup)

    def parse(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")


class Page1Scraper(BaseScraper):
    def parse(self, soup):
        data = soup.find_all('div', class_='data1')
        return [item.text for item in data]


class Page2Scraper(BaseScraper):
    def parse(self, soup):
        data = soup.find_all('span', class_='data2')
        return [item.text for item in data]


def scrape_url(url, scraper):
    return scraper.scrape(url)


# Usage
page1_scraper = Page1Scraper()
page2_scraper = Page2Scraper()

data1 = scrape_url('https://example.com/page1', page1_scraper)
data2 = scrape_url('https://example.com/page2', page2_scraper)


#############################################################
# Or sth else

class URLTypeMixin:
    @staticmethod
    def get_scraper(url):
        if 'page1' in url:
            return Page1Scraper()
        elif 'page2' in url:
            return Page2Scraper()
        else:
            raise ValueError(f"Unsupported URL type: {url}")

def scrape_url(url):
    scraper = URLTypeMixin.get_scraper(url)
    return scraper.scrape(url)


# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')


 

#############################################################
# Or sth better

class URLTypeMixin:
    @staticmethod
    def get_scraper(url):
        urls_map = {
            'page1': Page1Scraper,
            'page2': Page2Scraper,
        }
        for page_name, page_scraper_class in urls_map.items():
            if page_name in url:
                return page_scraper_class()


def scrape_url(url):
    scraper = URLTypeMixin.get_scraper(url)
    return scraper.scrape(url)


# Usage
data1 = scrape_url('https://example.com/page1')
data2 = scrape_url('https://example.com/page2')

 

Example 3: Refactoring API Integration


Junior Developer's Approach

A junior developer might write code to integrate with different APIs with repetitive request handling.

import requests


def get_user_data(api_url):
    response = requests.get(api_url + '/user')
    return response.json()


def get_order_data(api_url):
    response = requests.get(api_url + '/order')
    return response.json()


def get_product_data(api_url):
    response = requests.get(api_url + '/product')
    return response.json()


# Usage
user_data = get_user_data('https://api.example.com')
order_data = get_order_data('https://api.example.com')
product_data = get_product_data('https://api.example.com')

 

Senior Developer's Approach

A senior developer would refactor the code to use a common API client class.

import requests


class APIClient:
    def __init__(self, base_url):
        self.base_url = base_url

    def get(self, endpoint):
        response = requests.get(f"{self.base_url}/{endpoint}")
        response.raise_for_status()
        return response.json()


class UserService:
    def __init__(self, client):
        self.client = client

    def get_user_data(self):
        return self.client.get('user')


class OrderService:
    def __init__(self, client):
        self.client = client

    def get_order_data(self):
        return self.client.get('order')


class ProductService:
    def __init__(self, client):
        self.client = client

    def get_product_data(self):
        return self.client.get('product')

# Usage
api_client = APIClient('https://api.example.com')
user_service = UserService(api_client)
order_service = OrderService(api_client)
product_service = ProductService(api_client)

user_data = user_service.get_user_data()
order_data = order_service.get_order_data()
product_data = product_service.get_product_data()


#############################################################
### OR

import requests

class APIClient:
    def __init__(self, base_url):
        self.base_url = base_url

    def get(self, endpoint):
        response = requests.get(f"{self.base_url}/{endpoint}")
        response.raise_for_status()
        return response.json()

    def get_user_data(self):
        return self.get('user')

    def get_order_data(self):
        return self.get('order')

    def get_product_data(self):
        return self.get('product')

# Usage
api_client = APIClient('https://api.example.com')

user_data = api_client.get_user_data()
order_data = api_client.get_order_data()
product_data = api_client.get_product_data()


These examples demonstrate how a senior developer's approach to refactoring can lead to more modular, maintainable, and reusable code, following principles such as DRY, single responsibility, and clear separation of concerns.

 


Related

Python

[TIPS] Python List

Read more
Python Experience

[TIPS] Python - Get current path

Read more
Subscribe

Subscribe to our newsletter and never miss out lastest news.