Scrapling provides intuitive methods to traverse the DOM tree. You can move up to parent elements, down to children, sideways to siblings, or iterate through ancestors.
Returns the direct parent element or None if the element has no parent.
from scrapling import Fetcherpage = Fetcher.fetch('https://example.com')# Get parent of an elementlink = page.css('a.download').firstif link.parent: print(f"Parent tag: {link.parent.tag}") print(f"Parent classes: {link.parent.attrib.get('class')}")
Get the full path from root to the current element.
@propertydef path() -> Selectors
Returns a list of type Selectors that contains the path leading to the current element from the root.
# Get element pathelement = page.css('.deep-nested-item').firstpath = element.path# Print pathfor i, ancestor in enumerate(path): print(f"Level {i}: {ancestor.tag}")
Returns the direct children elements of the current element or an empty list otherwise.
# Get all childrencontainer = page.css('.container').firstchildren = container.childrenprint(f"Number of children: {len(children)}")for child in children: print(f"Child tag: {child.tag}")
Get all descendants (not just direct children) under the current element.
@propertydef below_elements() -> Selectors
Returns all elements under the current element in the DOM tree.
# Get all elements under a containersection = page.css('section.content').firstall_descendants = section.below_elementsprint(f"Total descendants: {len(all_descendants)}")# Find all links in descendantslinks = all_descendants.filter(lambda e: e.tag == 'a')
Get all sibling elements (other children of the same parent).
@propertydef siblings() -> Selectors
Returns other children of the current element’s parent or an empty list otherwise.
# Get all siblingselement = page.css('.active-item').firstsiblings = element.siblingsfor sibling in siblings: print(f"Sibling: {sibling.tag} - {sibling.attrib.get('class')}")
Extract breadcrumb information using parent navigation:
# Get breadcrumb pathcurrent_page = page.css('.breadcrumb .active').firstbreadcrumbs = []# Walk up the treefor ancestor in current_page.iterancestors(): if ancestor.has_class('breadcrumb-item'): breadcrumbs.insert(0, ancestor.text.strip())print(' > '.join(breadcrumbs))
# Find a specific cell and navigate to other cells in the same rowprice_cell = page.find_by_text('$99.99', first_match=True)if price_cell: # Get the row row = price_cell.find_ancestor(lambda e: e.tag == 'tr') if row: # Get all cells in the row cells = row.css('td') product_name = cells[0].text product_price = cells[1].text product_stock = cells[2].text print(f"{product_name}: {product_price} ({product_stock})")
# Find active menu item and get its siblingsactive_item = page.css('.menu-item.active').firstif active_item: # Get parent menu parent_menu = active_item.parent # Get all menu items at this level menu_items = active_item.siblings # Include the active item too all_items = [active_item] + list(menu_items) for item in all_items: link = item.css('a').first if link: print(f"{link.text}: {link.attrib.get('href')}")
# Process list items with previous/next contextlist_items = page.css('ul.timeline li')for item in list_items: current_text = item.text.strip() # Get previous item for context if item.previous: prev_text = item.previous.text.strip() print(f"After '{prev_text}': {current_text}") else: print(f"First: {current_text}") # Check if there's a next item if not item.next: print("(This is the last item)")
# Find a heading and get the content until the next headingheading = page.find_by_text('Description', first_match=True)if heading: content_parts = [] current = heading.next # Collect content until next heading while current and current.tag not in ['h1', 'h2', 'h3', 'h4']: content_parts.append(current.text.strip()) current = current.next description = ' '.join(content_parts) print(description)
def is_first_child(element): return element.previous is Nonedef is_last_child(element): return element.next is Nonedef get_position(element): """Get 0-based position among siblings""" position = 0 current = element.previous while current: position += 1 current = current.previous return position# Usageitem = page.css('.item').firstif is_first_child(item): print("This is the first item")print(f"Position: {get_position(item)}")
def find_common_ancestor(element1, element2): """Find the closest common ancestor of two elements""" ancestors1 = set(element1.iterancestors()) for ancestor in element2.iterancestors(): if ancestor in ancestors1: return ancestor return None# Usageelem1 = page.css('.item-1').firstelem2 = page.css('.item-2').firstcommon = find_common_ancestor(elem1, elem2)if common: print(f"Common ancestor: {common.tag}")