@staticmethod def _get(url: str) -> requests.Response: """GET with a tiny retry loop.""" for _ in range(3): try: r = requests.get(url, headers=BaseScraper.HEADERS, timeout=12) r.raise_for_status() return r except requests.RequestException: continue raise RuntimeError(f"Failed to fetch url")
# Add a source‑count field (how many sites host the same file) url_to_count = {} for m in matches: url_to_count[m["url"]] = url_to_count.get(m["url"], 0) + 1 for m in matches: m["source_count"] = url_to_count[m["url"]]
@staticmethod def _clean_link(raw: str) -> str: """Turn relative URLs into absolute ones.""" return raw if raw.startswith("http") else f"https:raw" @staticmethod def _get(url: str) -> requests
class Filmy4wapScraper(BaseScraper): SEARCH_URL = "https://www.filmy4wap.in/search?q=query"
# Apply matching logic matches = match_results(deduped, query_norm) Return jsonify(search_movie(title))
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "%20")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.movie-box") # CSS selector works for current layout results = [] for c in cards: title_tag = c.select_one("h2 a") if not title_tag: continue title = title_tag.get_text(strip=True) href = cls._clean_link(title_tag["href"])
# Example meta: "2022 Hindi 1080p" meta = c.select_one("span.meta") year, language, quality = None, None, None if meta: txt = meta.get_text() m_year = re.search(r"\b(20\d2)\b", txt) year = m_year.group(1) if m_year else None language = "Hindi" if "hindi" in txt.lower() else None qual_match = re.search(r"\b(720p|1080p|4k)\b", txt, re.I) quality = qual_match.group(0) if qual_match else None ) return results results.append( "source": "Filmy4wap"
print(json.dumps(data, ensure_ascii=False, indent=2)) this feature into your existing project | Scenario | Integration steps | |----------|-------------------| | Existing Flask/Django API | 1. Copy the whole file into a module (e.g. movie_finder.py ). 2. Import search_movie inside a view/endpoint. 3. Return jsonify(search_movie(title)) . | | Desktop GUI (Tkinter / PyQt) | 1. Wire a “Search” button to search_movie(user_input) . 2. Populate a table/list with result["title"] , year , quality , and a clickable hyperlink ( result["url"] ). | | Home‑assistant / Node‑RED | 1. Expose the script via a lightweight HTTP server (e.g. uvicorn + FastAPI). 2. Call the endpoint from your automation flow and parse the JSON. | |
results.append( "source": "Filmywap", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results
results.append( "source": "Filmy4wap", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results